| |
| import streamlit as st |
| import pandas as pd |
| import numpy as np |
| import matplotlib.pyplot as plt |
| from sklearn.ensemble import RandomForestClassifier |
| from sklearn.model_selection import train_test_split |
| from sklearn.metrics import accuracy_score |
|
|
| |
| st.title("Machine Learning Model Visualization") |
| st.write("This application demonstrates random forest classification on the iris dataset") |
|
|
| |
| @st.cache_data |
| def load_data(): |
| from sklearn.datasets import load_iris |
| iris = load_iris() |
| df = pd.DataFrame(iris.data, columns=iris.feature_names) |
| df['target'] = iris.target |
| return df, iris.target_names |
|
|
| data, target_names = load_data() |
|
|
| |
| st.subheader("Dataset Exploration") |
| if st.checkbox("Display dataset"): |
| st.dataframe(data) |
|
|
| |
| st.subheader("Feature Selection") |
| features = st.multiselect( |
| "Select features for model training", |
| options=data.columns[:-1], |
| default=data.columns[0] |
| ) |
|
|
| if len(features) > 0: |
| |
| st.subheader("Model Parameters") |
| n_estimators = st.slider("Number of trees", 1, 100, 10) |
| max_depth = st.slider("Maximum tree depth", 1, 20, 5) |
| |
| |
| if st.button("Train Model"): |
| X = data[features] |
| y = data['target'] |
| X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42) |
| |
| model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42) |
| model.fit(X_train, y_train) |
| |
| |
| y_pred = model.predict(X_test) |
| accuracy = accuracy_score(y_test, y_pred) |
| |
| st.success(f"Model accuracy: {accuracy:.4f}") |
| |
| |
| if len(features) > 1: |
| st.subheader("Feature Importance") |
| fig, ax = plt.subplots() |
| ax.bar(features, model.feature_importances_) |
| plt.xticks(rotation=45) |
| st.pyplot(fig) |
| else: |
| st.warning("Please select at least one feature for model training") |