| from datasets import load_dataset |
| import pandas as pd |
|
|
| from sklearn.ensemble import GradientBoostingRegressor |
| from sklearn.pipeline import make_pipeline |
| from sklearn.compose import make_column_transformer |
| from sklearn.compose import make_column_selector |
| from sklearn.preprocessing import OneHotEncoder |
|
|
| from skops import hub_utils |
| import pickle |
| from skops import card |
| from pathlib import Path |
|
|
| my_token = "your token here" |
|
|
| |
| dataset = load_dataset("brendenc/Fish") |
|
|
| df = pd.DataFrame(dataset['train'][:]) |
| target = df.Weight |
| df = df.drop('Weight', axis=1) |
|
|
| |
| one_hot_encoder = make_column_transformer( |
| ( |
| OneHotEncoder(sparse=False, handle_unknown="ignore"), |
| make_column_selector(dtype_include="object"), |
| ), |
| remainder="passthrough", |
| ) |
|
|
| |
| pipe = make_pipeline( |
| one_hot_encoder, GradientBoostingRegressor(random_state=42) |
| ) |
|
|
| pipe.fit(df, target) |
|
|
| |
| model_path = "example.pkl" |
| local_repo = "fish-model" |
| with open(model_path, mode="bw") as f: |
| pickle.dump(pipe, file=f) |
|
|
| |
| hub_utils.init( |
| model=model_path, |
| requirements=[f"scikit-learn={sklearn.__version__}"], |
| dst=local_repo, |
| task="tabular-regression", |
| data=df, |
| ) |
|
|
| |
| model_card = card.Card(pipe, metadata=card.metadata_from_config(Path('fish-model'))) |
|
|
| limitations = "This model is intended for educational purposes." |
| model_description = "This is a GradientBoostingRegressor on a fish dataset." |
| model_card_authors = "Brenden Connors" |
|
|
|
|
| |
| model_card.add( |
| model_card_authors=model_card_authors, |
| limitations=limitations, |
| model_description=model_description, |
| ) |
|
|
| |
| model_card.metadata.license = "mit" |
|
|
| model_card.save(Path(local_repo) / "README.md") |
|
|
| |
| repo_id = "scikit-learn/Fish-Weight/Fish-Weight" |
| hub_utils.push( |
| repo_id=repo_id, |
| source=local_repo, |
| token=my_token, |
| commit_message="Adding model files", |
| create_remote=True, |
| ) |