Spaces:

EnYa32
/

StarSystemClassification

Sleeping

App Files Files Community

EnYa32 commited on Dec 24, 2025

Commit

e78a19b

verified ·

1 Parent(s): 9869fe1

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +155 -37

src/streamlit_app.py CHANGED Viewed

@@ -1,40 +1,158 @@
-import altair as alt
-import numpy as np
 import pandas as pd
 import streamlit as st
-"""
-# Welcome to Streamlit!
-Edit `/streamlit_app.py` to customize this app to your heart's desire :heart:.
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).
-In the meantime, below is an example of what you can do with just a few lines of code:
-"""
-num_points = st.slider("Number of points in spiral", 1, 10000, 1100)
-num_turns = st.slider("Number of turns in spiral", 1, 300, 31)
-indices = np.linspace(0, 1, num_points)
-theta = 2 * np.pi * num_turns * indices
-radius = indices
-x = radius * np.cos(theta)
-y = radius * np.sin(theta)
-df = pd.DataFrame({
-    "x": x,
-    "y": y,
-    "idx": indices,
-    "rand": np.random.randn(num_points),
-})
-st.altair_chart(alt.Chart(df, height=700, width=700)
-    .mark_point(filled=True)
-    .encode(
-        x=alt.X("x", axis=None),
-        y=alt.Y("y", axis=None),
-        color=alt.Color("idx", legend=None, scale=alt.Scale()),
-        size=alt.Size("rand", legend=None, scale=alt.Scale(range=[1, 150])),
-    ))

 import pandas as pd
 import streamlit as st
+import joblib
+from pathlib import Path
+st.set_page_config(page_title='Star System Classification (LightGBM)', page_icon='🪐', layout='centered')
+BASE_DIR = Path(__file__).resolve().parent
+MODEL_PATH = BASE_DIR / 'lightgbm_model.pkl'
+FEATURES_PATH = BASE_DIR / 'featurer.pkl'  # you saved it with this name
+PLANET_ENCODER_PATH = BASE_DIR / 'planet_encoder.pkl'
+STAR_ENCODER_PATH = BASE_DIR / 'star_encoder.pkl'
+# --- Fixed mapping you used in training ---
+ACTIVITY_MAP = {'Low': 0, 'Medium': 1, 'High': 2}
+# Optional: label names (edit if your competition uses different names)
+LABEL_NAMES = {
+    0: 'Habitable',
+    1: 'Young',
+    2: 'Old',
+    3: 'Exotic'
+}
+@st.cache_resource
+def load_artifacts():
+    missing = [p.name for p in [MODEL_PATH, FEATURES_PATH, PLANET_ENCODER_PATH, STAR_ENCODER_PATH] if not p.exists()]
+    if missing:
+        raise FileNotFoundError(
+            'Missing files in repo root: ' + ', '.join(missing) +
+            '\n\nMake sure these files are in the same folder as app.py:\n'
+            '- lightgbm_model.pkl\n- featurer.pkl\n- planet_encoder.pkl\n- star_encoder.pkl'
+        )
+    model = joblib.load(MODEL_PATH)
+    features = joblib.load(FEATURES_PATH)
+    le_planet = joblib.load(PLANET_ENCODER_PATH)
+    le_star = joblib.load(STAR_ENCODER_PATH)
+    return model, features, le_planet, le_star
+def safe_transform(le, value: str, col_name: str) -> int:
+    """Transform a single category value with a saved LabelEncoder.
+    If unseen value appears, show a helpful error."""
+    try:
+        return int(le.transform([value])[0])
+    except Exception:
+        known = list(getattr(le, 'classes_', []))
+        st.error(f'Unknown category for {col_name}: {value}. Known values: {known}')
+        st.stop()
+model, FEATURES, le_planet, le_star = load_artifacts()
+st.title('🪐 Star System Classification (LightGBM)')
+st.write('Predict the star system type using 10 astrophysical measurements (multiclass).')
+with st.expander('ℹ️ Required files in this folder', expanded=False):
+    st.code(
+        'app.py\n'
+        'lightgbm_model.pkl\n'
+        'featurer.pkl\n'
+        'planet_encoder.pkl\n'
+        'star_encoder.pkl\n'
+        'requirements.txt'
+    )
+st.subheader('Enter feature values')
+# --- Inputs ---
+# Numeric
+star_size = st.number_input('star_size', min_value=0.0, value=1.0, step=0.01)
+star_brightness = st.number_input('star_brightness', min_value=0.0, value=1.2, step=0.01)
+distance_from_earth = st.number_input('distance_from_earth', min_value=0.0, value=90.0, step=1.0)
+star_mass = st.number_input('star_mass', min_value=0.0, value=1.3, step=0.01)
+metallicity = st.number_input('metallicity', value=0.02, step=0.001, format='%.4f')
+# Discrete numeric / encoded-like
+galaxy_region = st.selectbox('galaxy_region', options=[0, 1, 2], index=1)
+galaxy_type = st.selectbox('galaxy_type', options=[0, 1, 2], index=0)
+# Categorical (original strings)
+star_spectral_class = st.selectbox(
+    'star_spectral_class',
+    options=list(le_star.classes_),
+    index=0
+)
+planet_configuration = st.selectbox(
+    'planet_configuration',
+    options=list(le_planet.classes_),
+    index=0
+)
+stellar_activity_class = st.selectbox(
+    'stellar_activity_class',
+    options=['Low', 'Medium', 'High'],
+    index=0
+)
+# --- Build row in the ORIGINAL feature space ---
+row = {
+    'star_size': float(star_size),
+    'star_brightness': float(star_brightness),
+    'galaxy_region': int(galaxy_region),
+    'distance_from_earth': float(distance_from_earth),
+    'galaxy_type': int(galaxy_type),
+    'star_spectral_class': star_spectral_class,
+    'planet_configuration': planet_configuration,
+    'stellar_activity_class': stellar_activity_class,
+    'star_mass': float(star_mass),
+    'metallicity': float(metallicity),
+}
+# --- Apply same preprocessing as training ---
+# Mapping for activity (ordinal)
+row['stellar_activity_class'] = ACTIVITY_MAP[row['stellar_activity_class']]
+# LabelEncoders for the other two categorical columns
+row['planet_configuration'] = safe_transform(le_planet, planet_configuration, 'planet_configuration')
+row['star_spectral_class'] = safe_transform(le_star, star_spectral_class, 'star_spectral_class')
+# Make DataFrame and enforce correct column order
+X_input = pd.DataFrame([row])
+# Ensure all expected feature columns exist
+missing_cols = [c for c in FEATURES if c not in X_input.columns]
+extra_cols = [c for c in X_input.columns if c not in FEATURES]
+if missing_cols:
+    st.error(f'Missing columns for model: {missing_cols}')
+    st.stop()
+if extra_cols:
+    # Not an error, but we will drop extras to be safe
+    X_input = X_input.drop(columns=extra_cols)
+X_input = X_input[FEATURES]
+st.divider()
+col1, col2 = st.columns(2)
+with col1:
+    if st.button('🔮 Predict', use_container_width=True):
+        pred = model.predict(X_input)[0]
+        pred_int = int(pred)
+        label = LABEL_NAMES.get(pred_int, str(pred_int))
+        st.success(f'Prediction: **{label}** (class {pred_int})')
+with col2:
+    if st.button('📊 Predict probabilities', use_container_width=True):
+        if hasattr(model, 'predict_proba'):
+            proba = model.predict_proba(X_input)[0]
+            proba_df = pd.DataFrame({'class': list(range(len(proba))), 'probability': proba}).sort_values('probability', ascending=False)
+            proba_df['label'] = proba_df['class'].map(LABEL_NAMES).fillna(proba_df['class'].astype(str))
+            st.dataframe(proba_df[['label', 'class', 'probability']], use_container_width=True)
+        else:
+            st.warning('This model does not support predict_proba().')
+st.caption('Tip: If predictions look wrong, ensure the same encoders and feature order are used as during training.')