Spaces:
Build error
Build error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from sklearn.preprocessing import StandardScaler, LabelEncoder | |
| from sklearn.cluster import DBSCAN | |
| from sklearn.ensemble import RandomForestClassifier | |
| import matplotlib.pyplot as plt | |
| import seaborn as sns | |
| # Load the dataset | |
| def load_data(): | |
| return pd.read_csv("Animal Dataset.csv") | |
| # Convert ranges to float (if needed) | |
| def convert_range_to_float(val): | |
| try: | |
| if isinstance(val, str) and '-' in val: | |
| start, end = map(float, val.split('-')) | |
| return (start + end) / 2 | |
| else: | |
| return float(val) | |
| except: | |
| return np.nan | |
| # Title | |
| st.title("πΎ Animal Diet Prediction and Lifespan Clustering") | |
| st.markdown("#### Check animal diet and group them according to lifespan using Random Forest Classifier and DBSCAN") | |
| tabs = st.tabs(["π Supervised Learning", "π Unsupervised Learning"]) | |
| data = load_data() | |
| # Select features to use | |
| numeric_features = ['Height (cm)', 'Weight (kg)', 'Average Speed (km/h)', 'Top Speed (km/h)', 'Gestation Period (days)', 'Offspring per Birth', 'Lifespan (years)'] | |
| # Clean the features | |
| for col in numeric_features: | |
| data[col] = data[col].apply(convert_range_to_float) | |
| data = data.dropna(subset=numeric_features) | |
| # Encode the target for supervised learning | |
| if 'Diet' in data.columns: | |
| label_encoder = LabelEncoder() | |
| data['Diet_encoded'] = label_encoder.fit_transform(data['Diet']) | |
| # Tab 1: Supervised Learning | |
| with tabs[0]: | |
| st.header("Supervised: Predict Animal Diet Type") | |
| # Sliders | |
| st.subheader("Adjust the animal's characteristics to predict its diet") | |
| height = st.slider("Height (cm)", min_value=int(data['Height (cm)'].min()), max_value=int(data['Height (cm)'].max()), help="How tall is the animal?") | |
| weight = st.slider("Weight (kg)", min_value=int(data['Weight (kg)'].min()), max_value=int(data['Weight (kg)'].max()), help="How much does the animal weigh?") | |
| speed = st.slider("Average Speed (km/h)", min_value=int(data['Average Speed (km/h)'].min()), max_value=int(data['Average Speed (km/h)'].max()), help="How fast can the animal move?") | |
| top_speed = st.slider("Top Speed (km/h)", min_value=int(data['Top Speed (km/h)'].min()), max_value=int(data['Top Speed (km/h)'].max()), help="What is the maximum speed the animal can reach?") | |
| gestation = st.slider("Gestation Period (days)", min_value=int(data['Gestation Period (days)'].min()), max_value=int(data['Gestation Period (days)'].max()), help="How long is the animal's pregnancy?") | |
| offspring = st.slider("Offspring per Birth", min_value=int(data['Offspring per Birth'].min()), max_value=int(data['Offspring per Birth'].max()), help="How many offspring does the animal give birth to at once?") | |
| # Prepare input data for prediction (using the same features) | |
| input_data = pd.DataFrame({ | |
| 'Height (cm)': [height], | |
| 'Weight (kg)': [weight], | |
| 'Average Speed (km/h)': [speed], | |
| 'Top Speed (km/h)': [top_speed], | |
| 'Gestation Period (days)': [gestation], | |
| 'Offspring per Birth': [offspring] | |
| }) | |
| # Train a RandomForest Classifier for supervised learning | |
| X = data[numeric_features[:-1]] | |
| y = data['Diet_encoded'] | |
| clf = RandomForestClassifier() | |
| clf.fit(X, y) | |
| diet_pred = clf.predict(input_data) | |
| # Show prediction result | |
| predicted_diet = label_encoder.inverse_transform(diet_pred) | |
| st.subheader(f"Predicted Diet: {predicted_diet[0]}") | |
| # Tab 2: Unsupervised Learning | |
| with tabs[1]: | |
| st.header("Unsupervised: Group Animals Based on Lifespan") | |
| st.write("In this part, we will group animals based on their lifespan. This helps to identify patterns in how different animals live and how long they survive.") | |
| # Select minimum and maximum Lifespan | |
| st.subheader("Choose the lifespan range to group animals") | |
| lifespan_min = st.slider("Lifespan Min (years)", min_value=int(data['Lifespan (years)'].min()), max_value=int(data['Lifespan (years)'].max()), help="The minimum lifespan of the animals you are interested in.") | |
| lifespan_max = st.slider("Lifespan Max (years)", min_value=int(data['Lifespan (years)'].min()), max_value=int(data['Lifespan (years)'].max()), help="The maximum lifespan of the animals you are interested in.") | |
| # Filter dataset by lifespan | |
| filtered_data = data[(data['Lifespan (years)'] >= lifespan_min) & (data['Lifespan (years)'] <= lifespan_max)] | |
| # Check if there is data available after filtering | |
| if filtered_data.empty: | |
| st.warning("No animals found with the selected lifespan range. Please adjust the sliders.") | |
| else: | |
| # Scale the data for DBSCAN (automatically chosen parameters) | |
| scaler = StandardScaler() | |
| X_scaled = scaler.fit_transform(filtered_data[numeric_features[:-1]]) # Don't use 'Lifespan (years)' in unsupervised learning | |
| # Automatically run DBSCAN with default parameters | |
| db = DBSCAN(eps=1.5, min_samples=5) | |
| clusters = db.fit_predict(X_scaled) | |
| filtered_data['Cluster'] = clusters | |
| st.subheader("Clustered Animals by Lifespan") | |
| st.write(f"Animals are grouped based on their characteristics (except lifespan). The animals in the same cluster share similarities.") | |
| # Show the animals matching the selected lifespan range | |
| st.subheader("Animals in the Selected Lifespan Range") | |
| st.dataframe(filtered_data[['Animal', 'Lifespan (years)', 'Weight (kg)', 'Cluster']]) | |
| # Plot: Reduce the size of the plot | |
| st.subheader("Cluster Plot (Lifespan vs Weight)") | |
| fig, ax = plt.subplots(figsize=(10, 6)) | |
| sns.scatterplot(x=filtered_data['Lifespan (years)'], y=filtered_data['Weight (kg)'], hue=clusters, palette="tab10", ax=ax) | |
| st.pyplot(fig) | |