Spaces:

simnid
/

Engine-Predictive-Maintenance

Sleeping

App Files Files Community

Engine-Predictive-Maintenance / app.py

simnid

Upload folder using huggingface_hub

053e43e verified 2 months ago

raw

history blame contribute delete

10 kB

	# Importing packages
	import streamlit as st
	import pandas as pd
	import numpy as np
	from huggingface_hub import hf_hub_download
	import joblib
	import io

	# App Configuration
	st.set_page_config(
	page_title="Engine Predictive Maintenance",
	page_icon="🛠️",
	layout="wide"
	)

	st.title("🛠️ Smart Engine Predictive Maintenance App")
	st.markdown("""
	This application predicts whether an engine is Faulty (maintenance required) or Normal
	based on sensor readings.

	Target:
	- 0 = Normal
	- 1 = Faulty

	Note: The model expects engineered features, so the app computes the same feature engineering
	used during training to ensure schema consistency.
	""")

	# Model Settings (Hugging Face)
	MODEL_REPO_ID = "simnid/predictive-maintenance-model"
	MODEL_FILENAME = "best_predictive_maintenance_model.joblib"

	# Dataset repo (for pulling bulk sample)
	DATA_REPO_ID = "simnid/predictive-engine-maintenance-dataset"
	BULK_TEST_FILENAME = "bulk_test_sample.csv"

	RAW_COLS = [
	"Engine rpm",
	"Lub oil pressure",
	"Fuel pressure",
	"Coolant pressure",
	"lub oil temp",
	"Coolant temp"
	]

	ENGINEERED_COLS = [
	"RPM_FuelPressure_Ratio",
	"Power_Index",
	"Thermal_Pressure_Index",
	"Mech_Cooling_Balance",
	"Pressure_Coordination",
	"Low_Oil_Pressure_Flag",
	"High_Coolant_Temp_Flag",
	"Low_RPM_Flag"
	]

	FINAL_FEATURE_ORDER = RAW_COLS + ENGINEERED_COLS

	# Feature Engineering
	def add_engineered_features(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()

	# Ensure required raw columns exist
	missing = [c for c in RAW_COLS if c not in df.columns]
	if missing:
	raise ValueError(f"Missing required columns: {missing}")

	# Convert to numeric (safe conversion)
	for c in RAW_COLS:
	df[c] = pd.to_numeric(df[c], errors="coerce")

	if df[RAW_COLS].isnull().any().any():
	bad_cols = df[RAW_COLS].columns[df[RAW_COLS].isnull().any()].tolist()
	raise ValueError(f"Non-numeric / missing values detected in: {bad_cols}")

	# Interaction Features
	df["RPM_FuelPressure_Ratio"] = df["Engine rpm"] / (df["Fuel pressure"] + 1e-5)
	df["Power_Index"] = (df["Engine rpm"] * df["Fuel pressure"]) / 1000

	# System Stress Indicators
	df["Thermal_Pressure_Index"] = df["Coolant temp"] / (df["Fuel pressure"] + 1e-5)
	df["Mech_Cooling_Balance"] = (
	(df["Engine rpm"] + df["Lub oil pressure"]) -
	(df["Coolant temp"] + df["Coolant pressure"])
	)
	df["Pressure_Coordination"] = df["Fuel pressure"] - df["Coolant pressure"]

	# Early Warning Flags (data-driven thresholds)
	df["Low_Oil_Pressure_Flag"] = (df["Lub oil pressure"] < 1.5).astype(int)
	df["High_Coolant_Temp_Flag"] = (df["Coolant temp"] > 100).astype(int)
	df["Low_RPM_Flag"] = (df["Engine rpm"] < 600).astype(int)

	return df[FINAL_FEATURE_ORDER]

	# Load Model
	@st.cache_resource
	def load_model():
	try:
	model_path = hf_hub_download(
	repo_id=MODEL_REPO_ID,
	filename=MODEL_FILENAME,
	repo_type="model"
	)
	return joblib.load(model_path)
	except Exception as e:
	st.error(f"Error loading model from Hugging Face: {e}")
	return None

	model = load_model()
	if model is None:
	st.warning("Model could not be loaded. Please verify model repo + filename.")
	st.stop()


	# Sidebar: Business + Model Context
	with st.sidebar:
	st.header("About This Model")
	st.markdown("""
	Model Details
	- Model Type: Gradient Boosting Classifier
	- Optimization Objective: Maximize recall for faulty engines (minimize missed failures)
	- Artifact Source: Hugging Face Model Hub

	Why Recall Matters
	A false negative means a failure was missed, leading to downtime, safety risks, and costly repairs.
	""")

	st.subheader("Production Metrics (Reference)")
	st.metric("Recall (Faulty)", "0.84")
	st.metric("ROC-AUC", "0.70")
	st.metric("PR-AUC", "0.80")

	st.markdown("---")
	st.subheader("Decision Threshold")
	threshold = st.slider(
	"Classification Threshold (Faulty if P ≥ threshold)",
	min_value=0.05, max_value=0.95, value=0.50, step=0.01
	)
	st.caption("Lower threshold → higher recall (fewer missed failures), but more false alarms.")


	# Tabs: Single + Bulk Prediction
	tab1, tab2 = st.tabs(["🔎 Single Prediction", "📦 Bulk Prediction"])


	# Single Prediction
	with tab1:
	st.subheader("Engine Sensor Inputs")

	c1, c2, c3 = st.columns(3)

	with c1:
	engine_rpm = st.number_input("Engine rpm", min_value=0.0, value=700.0, step=1.0)
	lub_oil_pressure = st.number_input("Lub oil pressure", min_value=0.0, value=2.50, step=0.01)

	with c2:
	fuel_pressure = st.number_input("Fuel pressure", min_value=0.0, value=12.00, step=0.01)
	coolant_pressure = st.number_input("Coolant pressure", min_value=0.0, value=2.50, step=0.01)

	with c3:
	lub_oil_temp = st.number_input("lub oil temp", min_value=0.0, value=80.0, step=0.1)
	coolant_temp = st.number_input("Coolant temp", min_value=0.0, value=85.0, step=0.1)

	raw_input_df = pd.DataFrame([{
	"Engine rpm": engine_rpm,
	"Lub oil pressure": lub_oil_pressure,
	"Fuel pressure": fuel_pressure,
	"Coolant pressure": coolant_pressure,
	"lub oil temp": lub_oil_temp,
	"Coolant temp": coolant_temp
	}])

	try:
	feature_df = add_engineered_features(raw_input_df)
	except Exception as e:
	st.error(f"Feature engineering failed: {e}")
	st.stop()

	with st.expander("View engineered input dataframe"):
	st.dataframe(feature_df)
	csv = feature_df.to_csv(index=False).encode("utf-8")
	st.download_button("Download Engineered Input CSV", csv, "engine_input_features.csv", "text/csv")

	st.subheader("Prediction Output")

	if st.button("Predict Engine Condition", type="primary", use_container_width=True):
	try:
	proba_faulty = None
	if hasattr(model, "predict_proba"):
	proba_faulty = float(model.predict_proba(feature_df)[0][1])

	# Threshold-based classification (business control)
	if proba_faulty is not None:
	pred_class = int(proba_faulty >= threshold)
	else:
	pred_class = int(model.predict(feature_df)[0])

	colA, colB = st.columns(2)

	with colA:
	if pred_class == 1:
	st.error("⚠️ Prediction: FAULTY (Maintenance Recommended)")
	else:
	st.success("✅ Prediction: NORMAL (No Immediate Maintenance Required)")

	with colB:
	if proba_faulty is not None:
	st.metric("Probability of Faulty (Class 1)", f"{proba_faulty*100:.1f}%")
	st.progress(int(proba_faulty * 100))
	else:
	st.info("Probability score unavailable (model does not support predict_proba).")

	except Exception as e:
	st.error(f"Prediction failed: {e}")

	# Bulk Prediction
	with tab2:
	st.subheader("Bulk CSV Prediction")

	st.markdown("""
	Upload a CSV containing raw sensor columns only:

	- Engine rpm
	- Lub oil pressure
	- Fuel pressure
	- Coolant pressure
	- lub oil temp
	- Coolant temp

	The app will automatically engineer features and return:
	- `Predicted_Class` (0/1)
	- `Faulty_Probability` (if available)
	""")

	# Try pulling a sample file from HF dataset repo (like tourism project pattern)
	@st.cache_resource
	def load_bulk_sample():
	try:
	path = hf_hub_download(
	repo_id=DATA_REPO_ID,
	filename=BULK_TEST_FILENAME,
	repo_type="dataset"
	)
	return pd.read_csv(path)
	except Exception:
	return None

	sample_df = load_bulk_sample()
	if sample_df is not None:
	with st.expander("Preview bulk sample from Hugging Face"):
	st.dataframe(sample_df.head())

	uploaded_file = st.file_uploader("Upload CSV for bulk prediction", type=["csv"])

	bulk_df = None
	if uploaded_file is not None:
	bulk_df = pd.read_csv(uploaded_file)
	elif sample_df is not None:
	bulk_df = sample_df.copy()

	if bulk_df is not None:
	st.markdown("✅ Bulk data loaded.")
	st.dataframe(bulk_df.head())

	if st.button("Run Bulk Prediction", use_container_width=True):
	try:
	# Ensure required columns exist
	missing = [c for c in RAW_COLS if c not in bulk_df.columns]
	if missing:
	st.error(f"Missing required columns: {missing}")
	st.stop()

	bulk_features = add_engineered_features(bulk_df[RAW_COLS])

	# Predict
	preds = model.predict(bulk_features).astype(int)

	if hasattr(model, "predict_proba"):
	probs = model.predict_proba(bulk_features)[:, 1]
	else:
	probs = np.full(shape=(len(bulk_features),), fill_value=np.nan)

	# Threshold override if proba exists
	if hasattr(model, "predict_proba"):
	preds = (probs >= threshold).astype(int)

	out = bulk_df.copy()
	out["Predicted_Class"] = preds
	out["Faulty_Probability"] = probs

	st.success("Bulk predictions completed.")
	st.dataframe(out.head(50))

	out_csv = out.to_csv(index=False).encode("utf-8")
	st.download_button(
	"Download Bulk Predictions CSV",
	out_csv,
	"bulk_engine_predictions.csv",
	"text/csv"
	)

	except Exception as e:
	st.error(f"Bulk prediction failed: {e}")


	# Footer
	st.markdown("---")
	st.caption("Predictive Maintenance \| Gradient Boosting + Streamlit + Hugging Face Model Hub")