| import gradio as gr |
| import numpy as np |
| import matplotlib |
| matplotlib.use("Agg") |
| import matplotlib.pyplot as plt |
| from sklearn.datasets import fetch_openml |
| from sklearn.utils import shuffle |
| from sklearn.ensemble import StackingRegressor |
| from sklearn.linear_model import RidgeCV |
| from skops.hub_utils import download |
| import joblib |
| import shutil |
|
|
| |
| def load_ames_housing(): |
| df = fetch_openml(name="house_prices", as_frame=True, parser="pandas") |
| X = df.data |
| y = df.target |
|
|
| features = [ |
| "YrSold", |
| "HeatingQC", |
| "Street", |
| "YearRemodAdd", |
| "Heating", |
| "MasVnrType", |
| "BsmtUnfSF", |
| "Foundation", |
| "MasVnrArea", |
| "MSSubClass", |
| "ExterQual", |
| "Condition2", |
| "GarageCars", |
| "GarageType", |
| "OverallQual", |
| "TotalBsmtSF", |
| "BsmtFinSF1", |
| "HouseStyle", |
| "MiscFeature", |
| "MoSold", |
| ] |
|
|
| X = X.loc[:, features] |
| X, y = shuffle(X, y, random_state=0) |
|
|
| X = X.iloc[:600] |
| y = y.iloc[:600] |
| return X, np.log(y) |
|
|
| def stacked_model(model1,model2,model3): |
| X, y = load_ames_housing() |
| estimators = [] |
| for model in [model1,model2,model3]: |
| download(repo_id=model, dst='temp_dir') |
| pipeline = joblib.load( "temp_dir/model.pkl") |
| estimators.append((model.split('/')[-1], pipeline)) |
| shutil.rmtree("temp_dir") |
|
|
| stacking_regressor = StackingRegressor(estimators=estimators, final_estimator=RidgeCV()) |
|
|
| |
| import time |
| import matplotlib.pyplot as plt |
| from sklearn.metrics import PredictionErrorDisplay |
| from sklearn.model_selection import cross_validate, cross_val_predict |
|
|
| fig, axs = plt.subplots(2, 2, figsize=(9, 7)) |
| axs = np.ravel(axs) |
|
|
| for ax, (name, est) in zip( |
| axs, estimators + [("Stacking Regressor", stacking_regressor)] |
| ): |
| scorers = {"R2": "r2", "MAE": "neg_mean_absolute_error"} |
|
|
| start_time = time.time() |
| scores = cross_validate( |
| est, X, y, scoring=list(scorers.values()), n_jobs=-1, verbose=0 |
| ) |
|
|
| elapsed_time = time.time() - start_time |
|
|
| y_pred = cross_val_predict(est, X, y, n_jobs=-1, verbose=0) |
| scores = { |
| key: ( |
| f"{np.abs(np.mean(scores[f'test_{value}'])):.2f} +- " |
| f"{np.std(scores[f'test_{value}']):.2f}" |
| ) |
| for key, value in scorers.items() |
| } |
|
|
| display = PredictionErrorDisplay.from_predictions( |
| y_true=y, |
| y_pred=y_pred, |
| kind="actual_vs_predicted", |
| ax=ax, |
| scatter_kwargs={"alpha": 0.2, "color": "tab:blue"}, |
| line_kwargs={"color": "tab:red"}, |
| ) |
| ax.set_title(f"{name}\nEvaluation in {elapsed_time:.2f} seconds") |
|
|
| for name, score in scores.items(): |
| ax.plot([], [], " ", label=f"{name}: {score}") |
| ax.legend(loc="upper left") |
|
|
| fig.suptitle("Single predictor versus stacked predictors") |
| fig.tight_layout() |
| fig.subplots_adjust(top=0.9) |
| return fig |
|
|
| title = "Combine predictors using stacking" |
| with gr.Blocks(title=title) as demo: |
| gr.Markdown(f"## {title}") |
| gr.Markdown(""" |
| This app demonstrates combining 3 predictors trained on Ames housing dataset from OpenML using stacking and Ridge estimator as final estimator. |
| Stacking uses a meta-learning algorithm to learn how to combine the predictions from trained models. |
| The OpenML Ames housing dataset is a processed version of the 'Ames Iowa Housing' with 81 features. |
| This app is developed based on [scikit-learn example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_stack_predictors.html#sphx-glr-auto-examples-ensemble-plot-stack-predictors-py) |
| """) |
|
|
| model1 = gr.Textbox(label="Repo id of first model", value="haizad/ames-housing-random-forest-predictor") |
| model2 = gr.Textbox(label="Repo id of second model", value="haizad/ames-housing-gbdt-predictor") |
| model3 = gr.Textbox(label="Repo id of third model", value="haizad/ames-housing-lasso-predictor") |
| plot = gr.Plot(label="Comparison of single predictor against stacked predictor") |
| stack_btn = gr.Button("Stack") |
| stack_btn.click(fn=stacked_model, inputs=[model1,model2,model3], outputs=[plot]) |
|
|
| demo.launch() |
|
|
|
|