|
from datasets import load_dataset |
|
import pandas as pd |
|
|
|
from sklearn.ensemble import GradientBoostingRegressor |
|
from sklearn.pipeline import make_pipeline |
|
from sklearn.compose import make_column_transformer |
|
from sklearn.compose import make_column_selector |
|
from sklearn.preprocessing import OneHotEncoder |
|
|
|
from skops import hub_utils |
|
import pickle |
|
from skops import card |
|
from pathlib import Path |
|
|
|
my_token = "your token here" |
|
|
|
|
|
dataset = load_dataset("brendenc/Fish") |
|
|
|
df = pd.DataFrame(dataset['train'][:]) |
|
target = df.Weight |
|
df = df.drop('Weight', axis=1) |
|
|
|
|
|
one_hot_encoder = make_column_transformer( |
|
( |
|
OneHotEncoder(sparse=False, handle_unknown="ignore"), |
|
make_column_selector(dtype_include="object"), |
|
), |
|
remainder="passthrough", |
|
) |
|
|
|
|
|
pipe = make_pipeline( |
|
one_hot_encoder, GradientBoostingRegressor(random_state=42) |
|
) |
|
|
|
pipe.fit(df, target) |
|
|
|
|
|
model_path = "example.pkl" |
|
local_repo = "fish-model" |
|
with open(model_path, mode="bw") as f: |
|
pickle.dump(pipe, file=f) |
|
|
|
|
|
hub_utils.init( |
|
model=model_path, |
|
requirements=[f"scikit-learn={sklearn.__version__}"], |
|
dst=local_repo, |
|
task="tabular-regression", |
|
data=df, |
|
) |
|
|
|
|
|
model_card = card.Card(pipe, metadata=card.metadata_from_config(Path('fish-model'))) |
|
|
|
limitations = "This model is intended for educational purposes." |
|
model_description = "This is a GradientBoostingRegressor on a fish dataset." |
|
model_card_authors = "Brenden Connors" |
|
|
|
|
|
|
|
model_card.add( |
|
model_card_authors=model_card_authors, |
|
limitations=limitations, |
|
model_description=model_description, |
|
) |
|
|
|
|
|
model_card.metadata.license = "mit" |
|
|
|
model_card.save(Path(local_repo) / "README.md") |
|
|
|
|
|
repo_id = "scikit-learn/Fish-Weight/Fish-Weight" |
|
hub_utils.push( |
|
repo_id=repo_id, |
|
source=local_repo, |
|
token=my_token, |
|
commit_message="Adding model files", |
|
create_remote=True, |
|
) |