| import streamlit as st |
| import pandas as pd |
| from sklearn.model_selection import train_test_split |
| from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier |
| from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression |
| from sklearn.svm import SVR, SVC |
| from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report |
| from sklearn.impute import SimpleImputer |
| from sklearn.datasets import load_iris, fetch_california_housing |
| import base64 |
|
|
| |
| st.set_page_config( |
| page_title="Machine Learning App", |
| page_icon="https://i.imgur.com/C6lAamP.png", |
| layout="wide", |
| initial_sidebar_state="expanded", |
| ) |
|
|
| |
| st.sidebar.image("https://i.imgur.com/kpkwAUT.png", use_column_width=True) |
|
|
| |
| def set_bg_hack_url(): |
| ''' |
| A function to unpack an image from url and set as bg. |
| Returns |
| ------- |
| The background. |
| ''' |
| |
| st.markdown( |
| f""" |
| <style> |
| .stApp {{ |
| background: url("https://c.wallhere.com/photos/91/50/mountain_top_black_dark_nature_monochrome_landscape_mountains-1296041.jpg!d"); |
| background-size: cover |
| }} |
| </style> |
| """, |
| unsafe_allow_html=True |
| ) |
|
|
| |
| set_bg_hack_url() |
|
|
| |
| st.markdown( |
| f""" |
| <style> |
| .sidebar .sidebar-content {{ |
| background-image: linear-gradient(#D5DBDB, #F2F3F4); |
| }} |
| .st-cc {{ |
| color: #566573; |
| }} |
| .st-cq {{ |
| color: #566573; |
| }} |
| .st-c8 {{ |
| color: #FFCDD2; |
| }} |
| .st-top-right {{ |
| position: fixed; |
| top: 60px; |
| right: 10px; |
| font-size: 16px; |
| color: #FF0000; /* Red text color */ |
| background-color: rgba(255, 255, 255, 0.8); /* White background color with some transparency */ |
| padding: 5px 10px; /* Add padding to the text */ |
| border-radius: 5px; /* Add border radius for rounded corners */ |
| font-family: Arial, sans-serif; |
| }} |
| </style> |
| """, |
| unsafe_allow_html=True, |
| ) |
|
|
|
|
| |
| def build_regression_model(df, algorithm, hyperparameters): |
| X = df.iloc[:, :-1] |
| y = df.iloc[:, -1] |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
| st.markdown('**1.2. Data splits**') |
| st.write('Training set') |
| st.info(X_train.shape) |
| st.write('Test set') |
| st.info(X_test.shape) |
|
|
| st.markdown('**1.3. Variable details**:') |
| st.write('X variable') |
| st.info(list(X.columns)) |
| st.write('Y variable') |
| st.info(y.name) |
|
|
| model = algorithm(**hyperparameters) |
| model.fit(X_train, y_train) |
|
|
| |
| st.subheader('2. Model Performance') |
|
|
| st.markdown('**2.1. Training set**') |
| y_pred_train = model.predict(X_train) |
| st.write('Coefficient of determination ($R^2$):') |
| st.info(r2_score(y_train, y_pred_train)) |
|
|
| st.write('Error (MSE or MAE):') |
| st.info(mean_squared_error(y_train, y_pred_train)) |
|
|
| st.markdown('**2.2. Test set**') |
| y_pred_test = model.predict(X_test) |
| st.write('Coefficient of determination ($R^2$):') |
| st.info(r2_score(y_test, y_pred_test)) |
|
|
| st.write('Error (MSE or MAE):') |
| st.info(mean_squared_error(y_test, y_pred_test)) |
|
|
| |
| st.subheader('3. Model Parameters') |
| st.write(model.get_params()) |
|
|
| return model |
|
|
|
|
| |
| def build_classification_model(df, algorithm, hyperparameters): |
| X = df.iloc[:, :-1] |
| y = df.iloc[:, -1] |
|
|
| |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) |
|
|
| st.markdown('**1.2. Data splits**') |
| st.write('Training set') |
| st.info(X_train.shape) |
| st.write('Test set') |
| st.info(X_test.shape) |
|
|
| st.markdown('**1.3. Variable details**:') |
| st.write('X variable') |
| st.info(list(X.columns)) |
| st.write('Y variable') |
| st.info(y.name) |
|
|
| model = algorithm(**hyperparameters) |
| model.fit(X_train, y_train) |
|
|
| |
| st.subheader('2. Model Performance') |
|
|
| st.markdown('**2.1. Training set**') |
| y_pred_train = model.predict(X_train) |
| st.write('Accuracy Score:') |
| st.info(accuracy_score(y_train, y_pred_train)) |
|
|
| st.markdown('**2.2. Test set**') |
| y_pred_test = model.predict(X_test) |
| st.write('Accuracy Score:') |
| st.info(accuracy_score(y_test, y_pred_test)) |
|
|
| |
| st.subheader('3. Model Parameters') |
| st.write(model.get_params()) |
|
|
| return model |
|
|
|
|
| |
| st.markdown( |
| '<p style="color: green; font-family: monospace;">Disclaimer: This app works best when viewed in dark mode.<br> Click three dots in the top right corner -> Settings -> Choose app theme dark.</p>', |
| unsafe_allow_html=True |
| ) |
|
|
|
|
| |
| st.write( |
| """ |
| # Machine Learning App |
| Select the problem type, dataset, model, and hyperparameters. |
| """ |
| ) |
|
|
| |
| problem_type = st.sidebar.selectbox("Select Problem Type", ["Regression", "Classification"]) |
|
|
| |
| if problem_type == "Classification": |
| st.sidebar.write("### Select Dataset or Upload CSV File for Classification") |
| dataset_option = st.sidebar.selectbox("Select Dataset", ["Iris", "Upload CSV File"]) |
| elif problem_type == "Regression": |
| st.sidebar.write("### Select Dataset or Upload CSV File for Regression") |
| dataset_option = st.sidebar.selectbox("Select Dataset", ["California Housing", "Upload CSV File"]) |
|
|
| |
| df = None |
| if dataset_option == "Iris": |
| iris = load_iris() |
| df = pd.DataFrame(data=iris.data, columns=iris.feature_names) |
| df["target"] = iris.target |
| elif dataset_option == "California Housing": |
| housing = fetch_california_housing() |
| df = pd.DataFrame(data=housing.data, columns=housing.feature_names) |
| df["target"] = housing.target |
| elif dataset_option == "Upload CSV File": |
| uploaded_file = st.sidebar.file_uploader("Upload your CSV file", type=["csv"]) |
| if uploaded_file is not None: |
| df = pd.read_csv(uploaded_file) |
|
|
| |
| st.sidebar.write("### Select Model and Hyperparameters") |
|
|
| if problem_type == "Regression": |
| regression_algorithm = st.sidebar.selectbox( |
| "Select Regression Algorithm", |
| ["Linear Regression", "Ridge Regression", "Lasso Regression", "Random Forest Regression", "Gradient Boosting Regression"] |
| ) |
| if regression_algorithm == "Linear Regression": |
| hyperparameters = {} |
| algorithm = LinearRegression |
| elif regression_algorithm == "Ridge Regression": |
| alpha = st.sidebar.slider("Alpha", 0.0, 1.0, 0.5, 0.01) |
| hyperparameters = {"alpha": alpha} |
| algorithm = Ridge |
| elif regression_algorithm == "Lasso Regression": |
| alpha = st.sidebar.slider("Alpha", 0.0, 1.0, 0.5, 0.01) |
| hyperparameters = {"alpha": alpha} |
| algorithm = Lasso |
| elif regression_algorithm == "Random Forest Regression": |
| n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100) |
| max_features = st.sidebar.selectbox("Max Features", ["sqrt", "log2"]) |
| hyperparameters = {"n_estimators": n_estimators, "max_features": max_features} |
| algorithm = RandomForestRegressor |
| elif regression_algorithm == "Gradient Boosting Regression": |
| n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100) |
| learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1, 0.01) |
| hyperparameters = {"n_estimators": n_estimators, "learning_rate": learning_rate} |
| algorithm = GradientBoostingRegressor |
|
|
| elif problem_type == "Classification": |
| classification_algorithm = st.sidebar.selectbox( |
| "Select Classification Algorithm", |
| ["Logistic Regression", "Support Vector Classifier", "Random Forest Classifier", "Gradient Boosting Classifier"] |
| ) |
| if classification_algorithm == "Logistic Regression": |
| hyperparameters = {"max_iter": 1000} |
| algorithm = LogisticRegression |
| elif classification_algorithm == "Support Vector Classifier": |
| kernel = st.sidebar.selectbox("Kernel", ["linear", "poly", "rbf", "sigmoid"]) |
| hyperparameters = {"kernel": kernel} |
| algorithm = SVC |
| elif classification_algorithm == "Random Forest Classifier": |
| n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100) |
| max_features = st.sidebar.selectbox("Max Features", ["sqrt", "log2"]) |
| hyperparameters = {"n_estimators": n_estimators, "max_features": max_features} |
| algorithm = RandomForestClassifier |
| elif classification_algorithm == "Gradient Boosting Classifier": |
| n_estimators = st.sidebar.slider("Number of Estimators", 1, 1000, 100) |
| learning_rate = st.sidebar.slider("Learning Rate", 0.01, 1.0, 0.1, 0.01) |
| hyperparameters = {"n_estimators": n_estimators, "learning_rate": learning_rate} |
| algorithm = GradientBoostingClassifier |
|
|
| |
| st.write("## Results") |
|
|
| if df is not None: |
| st.write("### Data Preview") |
| st.write("First 20 rows of the dataset:") |
| st.write(df.head(20)) |
|
|
| if problem_type == "Regression": |
| model = build_regression_model(df, algorithm, hyperparameters) |
| st.write("### Regression Model") |
| st.write("Model:", algorithm.__name__) |
| elif problem_type == "Classification": |
| model = build_classification_model(df, algorithm, hyperparameters) |
| st.write("### Classification Model") |
| st.write("Model:", algorithm.__name__) |
| else: |
| st.info("Please upload a CSV file or select an example dataset.") |
|
|
| |
| st.markdown('<p class="st-top-right">Created by - RAJDEEP CHAKRAVORTY</p>', unsafe_allow_html=True) |