import numpy as np import pandas as pd import streamlit as st import joblib from pathlib import Path st.set_page_config(page_title='Body Density Predictor (Ridge)', page_icon='đŸ§â€â™‚ī¸', layout='centered') BASE_DIR = Path(__file__).resolve().parent MODEL_PATH = BASE_DIR / 'ridge_model.pkl' @st.cache_resource def load_model(): if not MODEL_PATH.exists(): raise FileNotFoundError( f'Model not found: {MODEL_PATH.name}. Put it in the repo root (same folder as app.py).' ) return joblib.load(MODEL_PATH) model = load_model() def get_feature_names(m): if hasattr(m, 'feature_names_in_'): return list(m.feature_names_in_) if hasattr(m, 'n_features_in_'): return [f'feature_{i}' for i in range(int(m.n_features_in_))] return [] FEATURES = get_feature_names(model) def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame: """ Adds engineered features ONLY if the model expects them: - Waist_hip = Abdomen / Hip - Body_Index (BMI in lbs/inches) = 703 * Weight / Height^2 """ if 'Waist_hip' in FEATURES: if 'Abdomen' in df.columns and 'Hip' in df.columns and (df['Hip'] != 0).all(): df['Waist_hip'] = df['Abdomen'] / df['Hip'] else: df['Waist_hip'] = np.nan if 'Body_Index' in FEATURES: if 'Weight' in df.columns and 'Height' in df.columns and (df['Height'] != 0).all(): df['Body_Index'] = 703.0 * df['Weight'] / (df['Height'] ** 2) else: df['Body_Index'] = np.nan return df def align_to_model(df: pd.DataFrame) -> pd.DataFrame: for col in FEATURES: if col not in df.columns: df[col] = np.nan return df[FEATURES] def predict_density(df_features: pd.DataFrame) -> float: return float(model.predict(df_features)[0]) def density_in_human_range(density: float) -> bool: # typical human body density range ~[0.95, 1.10] return 0.95 <= density <= 1.10 st.title('đŸ§â€â™‚ī¸ Body Density Predictor (Ridge Regression)') st.caption('Predicts **Body Density (Density)** using a trained Ridge Regression model.') with st.expander('â„šī¸ Units (important)', expanded=True): st.write( '- **Height** is entered in **cm** in this app and automatically converted to **inches** for the model\n' '- **Weight** is expected in **pounds (lbs)**\n' '- Other circumferences are in **inches**\n' '- Output: **Predicted Density**' ) with st.expander('🔎 Debug: model expected columns', expanded=False): st.code(', '.join(FEATURES) if FEATURES else 'No feature names found.') tab1, tab2 = st.tabs(['Single Prediction', 'Batch Prediction (CSV)']) # Ranges for realistic inputs (inches) RANGES = { 'Neck': (10.0, 25.0, 15.0), 'Chest': (28.0, 60.0, 40.0), 'Abdomen': (20.0, 65.0, 34.0), 'Hip': (25.0, 70.0, 38.0), 'Thigh': (12.0, 40.0, 22.0), 'Knee': (10.0, 30.0, 15.0), 'Ankle': (6.0, 18.0, 9.0), 'Biceps': (8.0, 25.0, 13.0), 'Forearm': (7.0, 20.0, 11.0), 'Wrist': (5.0, 12.0, 7.0) } # ---------- Single Prediction ---------- with tab1: if not FEATURES: st.error("This model does not expose feature names. Re-train using a pandas DataFrame so 'feature_names_in_' is available.") st.stop() engineered = {'Waist_hip', 'Body_Index'} required_base = set() if 'Waist_hip' in FEATURES: required_base.update({'Abdomen', 'Hip'}) if 'Body_Index' in FEATURES: required_base.update({'Weight', 'Height'}) user_cols = sorted((set(FEATURES) - engineered) | required_base) st.subheader('Enter measurements') with st.form('single_form'): values = {} for col in user_cols: col_l = col.lower() if col_l == 'age': values[col] = st.number_input('Age', min_value=18.0, max_value=90.0, value=35.0, step=1.0) continue if col_l == 'height': height_cm = st.number_input('Height (cm)', min_value=140.0, max_value=220.0, value=175.0, step=0.5) values[col] = float(height_cm) / 2.54 continue if col_l == 'weight': values[col] = st.number_input('Weight (lbs)', min_value=90.0, max_value=400.0, value=180.0, step=1.0) continue if col in RANGES: lo, hi, val = RANGES[col] values[col] = st.number_input( f'{col} (inches)', min_value=float(lo), max_value=float(hi), value=float(val), step=0.5 ) continue values[col] = st.number_input(col, value=0.0, step=0.1) submitted = st.form_submit_button('Predict') if submitted: df_raw = pd.DataFrame([values]) numeric_cols = df_raw.select_dtypes(include='number').columns if (df_raw[numeric_cols] <= 0).any().any(): st.error('Please enter values greater than 0 for all measurements.') st.stop() df_feat = add_engineered_features(df_raw.copy()) df_feat = align_to_model(df_feat) if df_feat.isna().any().any(): missing_cols = df_feat.columns[df_feat.isna().any()].tolist() st.error('Missing/invalid inputs for: ' + ', '.join(missing_cols)) st.stop() density = predict_density(df_feat) if not density_in_human_range(density): st.warning( 'Predicted density is outside the typical human range (0.95–1.10). ' 'Please double-check your inputs/units (Height is in cm here, converted to inches internally).' ) st.metric('Predicted Density', f'{density:.5f}') st.write('Model input used (aligned features):') st.dataframe(df_feat, use_container_width=True) # ---------- Batch Prediction ---------- with tab2: st.subheader('Upload a CSV with the required feature columns') st.write( "Tip: Your CSV should contain the same columns as the training features. " "In the CSV, **Height must be in inches** (or convert it before upload)." ) uploaded = st.file_uploader('Upload CSV', type=['csv']) if uploaded is not None: raw = pd.read_csv(uploaded) df = add_engineered_features(raw.copy()) df = align_to_model(df) if df.isna().any().any(): missing_cols = df.columns[df.isna().any()].tolist() st.error('Your uploaded CSV is missing required columns: ' + ', '.join(missing_cols)) st.write('Required columns (model expects):') st.code(', '.join(FEATURES)) st.stop() density_preds = model.predict(df).astype(float) out = raw.copy() out['Pred_Density'] = density_preds st.success('Predictions generated!') st.dataframe(out.head(50), use_container_width=True) csv_bytes = out.to_csv(index=False).encode('utf-8') st.download_button('Download predictions CSV', data=csv_bytes, file_name='predictions.csv', mime='text/csv')