Spaces:

ash-coded-it
/

Solar_Culient_Predictor

Runtime error

App Files Files Community

Solar_Culient_Predictor / enhanced_app.py

ash-coded-it

Upload folder using huggingface_hub

1e3f942 verified 7 months ago

raw

history blame contribute delete

29.7 kB

	#!/usr/bin/env python3
	import os
	import json
	import yaml
	from datetime import datetime
	from typing import Optional, Tuple, Union, Dict, List

	import gradio as gr
	import numpy as np
	import pandas as pd
	import plotly.express as px
	import plotly.graph_objects as go
	from plotly.subplots import make_subplots
	from sklearn.compose import ColumnTransformer
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import (
	average_precision_score,
	brier_score_loss,
	roc_auc_score,
	classification_report,
	confusion_matrix,
	)
	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.preprocessing import OneHotEncoder


	# Configuration
	BASE_DIR = os.getcwd()
	SCORES_DIR = "scores"
	PRESET_FEATURES = os.path.join(BASE_DIR, "examples", "synthetic_v2", "leads_features.csv")
	PRESET_OUTCOMES = os.path.join(BASE_DIR, "examples", "synthetic_v2", "outcomes.csv")
	DATA_DICTIONARY_PATH = os.path.join(BASE_DIR, "data_dictionary.yaml")

	# Feature candidates and categorical sets
	FEATURE_CANDIDATES = [
	"living_area_sqft",
	"average_monthly_kwh",
	"average_monthly_bill_usd",
	"shading_factor",
	"roof_suitability_score",
	"seasonality_index",
	"electric_panel_amperage",
	"has_pool",
	"is_remote_worker_household",
	"tdsp",
	"rate_structure",
	"credit_score_range",
	"household_income_bracket",
	"preferred_financing_type",
	"neighborhood_type",
	]

	CATEGORICAL = [
	"tdsp",
	"rate_structure",
	"credit_score_range",
	"household_income_bracket",
	"preferred_financing_type",
	"neighborhood_type",
	]

	# Load data dictionary
	def load_data_dictionary():
	"""Load the data dictionary for field descriptions"""
	try:
	with open(DATA_DICTIONARY_PATH, 'r') as f:
	return yaml.safe_load(f)
	except:
	return {}

	DATA_DICT = load_data_dictionary()

	def get_field_description(field_name: str) -> str:
	"""Get field description from data dictionary"""
	for group in DATA_DICT.get('field_groups', []):
	for field in group.get('fields', []):
	if field.get('field_name') == field_name:
	desc = field.get('description', '')
	label = field.get('label', '')
	return f"{label}: {desc}" if label and desc else (label or desc or field_name)
	return field_name

	def load_sample_data():
	"""Load sample data for dashboard"""
	try:
	features_df = pd.read_csv(PRESET_FEATURES)
	outcomes_df = pd.read_csv(PRESET_OUTCOMES)
	merged_df = features_df.merge(outcomes_df, on='lead_id', how='inner')
	return merged_df
	except Exception as e:
	print(f"Error loading sample data: {e}")
	return pd.DataFrame()

	# Utility functions
	def _safe_path(file_or_path: Optional[Union[str, gr.File]]) -> Optional[str]:
	"""Convert a gradio File object or string path to a usable string path."""
	if file_or_path is None:
	return None
	if isinstance(file_or_path, str):
	return file_or_path
	if hasattr(file_or_path, "name"):
	return file_or_path.name
	if isinstance(file_or_path, dict) and "name" in file_or_path:
	return file_or_path["name"]
	return None

	def _validate_inputs(df_features: pd.DataFrame, df_outcomes: pd.DataFrame) -> None:
	if "lead_id" not in df_features.columns:
	raise ValueError("Features CSV must contain a 'lead_id' column.")
	if "lead_id" not in df_outcomes.columns:
	raise ValueError("Outcomes CSV must contain a 'lead_id' column.")
	if "sold" not in df_outcomes.columns:
	raise ValueError("Outcomes CSV must contain a 'sold' column (0/1).")

	def _compute_metrics(y_true: np.ndarray, y_prob: np.ndarray) -> Tuple[Optional[float], Optional[float], Optional[float]]:
	"""Compute ROC AUC, PR AUC, and Brier score with graceful fallbacks."""
	auc = None
	pr_auc = None
	brier = None

	try:
	brier = float(brier_score_loss(y_true.astype(int), y_prob))
	except Exception:
	brier = None

	try:
	if len(np.unique(y_true.astype(int))) >= 2:
	auc = float(roc_auc_score(y_true.astype(int), y_prob))
	else:
	auc = None
	except Exception:
	auc = None

	try:
	if len(np.unique(y_true.astype(int))) >= 2:
	pr_auc = float(average_precision_score(y_true.astype(int), y_prob))
	else:
	pr_auc = None
	except Exception:
	pr_auc = None

	return auc, pr_auc, brier

	# Dashboard functions
	def create_overview_dashboard():
	"""Create overview analytics dashboard"""
	df = load_sample_data()
	if df.empty:
	return "No data available", None, None, None

	# Key metrics
	total_leads = len(df)
	qualified_leads = df['qualified_opportunity'].sum() if 'qualified_opportunity' in df.columns else 0
	sold_leads = df['sold'].sum() if 'sold' in df.columns else 0
	conversion_rate = (sold_leads / total_leads * 100) if total_leads > 0 else 0

	metrics_html = f"""
	<div style="display: flex; justify-content: space-around; margin: 20px 0;">
	<div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
	<h3 style="margin: 0; color: #2196F3;">{total_leads}</h3>
	<p style="margin: 5px 0;">Total Leads</p>
	</div>
	<div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
	<h3 style="margin: 0; color: #4CAF50;">{qualified_leads}</h3>
	<p style="margin: 5px 0;">Qualified Leads</p>
	</div>
	<div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
	<h3 style="margin: 0; color: #FF9800;">{sold_leads}</h3>
	<p style="margin: 5px 0;">Sold Leads</p>
	</div>
	<div style="text-align: center; padding: 20px; border: 1px solid #ddd; border-radius: 8px;">
	<h3 style="margin: 0; color: #9C27B0;">{conversion_rate:.1f}%</h3>
	<p style="margin: 5px 0;">Conversion Rate</p>
	</div>
	</div>
	"""

	# Geographic distribution
	geo_fig = None
	if 'tdsp' in df.columns:
	tdsp_counts = df['tdsp'].value_counts()
	geo_fig = px.bar(
	x=tdsp_counts.index,
	y=tdsp_counts.values,
	title="Lead Distribution by TDSP (Texas Utility Territory)",
	labels={'x': 'TDSP', 'y': 'Number of Leads'}
	)
	geo_fig.update_layout(height=400)

	# Conversion funnel
	funnel_fig = None
	if 'qualified_opportunity' in df.columns and 'sold' in df.columns:
	funnel_data = {
	'Stage': ['Total Leads', 'Qualified', 'Sold'],
	'Count': [total_leads, qualified_leads, sold_leads]
	}
	funnel_fig = px.funnel(
	funnel_data,
	x='Count',
	y='Stage',
	title="Lead Conversion Funnel"
	)
	funnel_fig.update_layout(height=400)

	# Lead scoring distribution
	score_fig = None
	if 'probability_to_buy' in df.columns:
	score_fig = px.histogram(
	df,
	x='probability_to_buy',
	title="Distribution of Lead Scores (Probability to Buy)",
	nbins=20
	)
	score_fig.update_layout(height=400)

	return metrics_html, geo_fig, funnel_fig, score_fig

	def search_leads(search_term: str, filter_tdsp: str, filter_sold: str):
	"""Search and filter leads"""
	df = load_sample_data()
	if df.empty:
	return pd.DataFrame()

	# Apply filters
	filtered_df = df.copy()

	if search_term:
	# Search in lead_id and other text fields
	mask = df['lead_id'].str.contains(search_term, case=False, na=False)
	if 'tdsp' in df.columns:
	mask \|= df['tdsp'].str.contains(search_term, case=False, na=False)
	filtered_df = df[mask]

	if filter_tdsp and filter_tdsp != "All":
	filtered_df = filtered_df[filtered_df['tdsp'] == filter_tdsp]

	if filter_sold and filter_sold != "All":
	sold_value = True if filter_sold == "Sold" else False
	filtered_df = filtered_df[filtered_df['sold'] == sold_value]

	# Select key columns for display
	display_cols = ['lead_id', 'tdsp', 'household_income_bracket', 'credit_score_range',
	'living_area_sqft', 'average_monthly_kwh', 'probability_to_buy', 'sold']
	display_cols = [col for col in display_cols if col in filtered_df.columns]

	return filtered_df[display_cols].head(100)

	def get_lead_details(lead_id: str):
	"""Get detailed information for a specific lead"""
	df = load_sample_data()
	if df.empty or not lead_id:
	return "No data available"

	lead_data = df[df['lead_id'] == lead_id]
	if lead_data.empty:
	return f"Lead {lead_id} not found"

	lead = lead_data.iloc[0]

	# Create detailed lead profile
	details_html = f"""
	<div style="padding: 20px; border: 1px solid #ddd; border-radius: 8px; margin: 10px 0;">
	<h3>Lead Profile: {lead_id}</h3>
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px;">
	<div>
	<h4>Demographics</h4>
	<p><strong>Age Bracket:</strong> {lead.get('age_bracket', 'N/A')}</p>
	<p><strong>Income:</strong> {lead.get('household_income_bracket', 'N/A')}</p>
	<p><strong>Credit Score:</strong> {lead.get('credit_score_range', 'N/A')}</p>
	<p><strong>Adults/Children:</strong> {lead.get('adults_count', 'N/A')}/{lead.get('children_count', 'N/A')}</p>
	</div>
	<div>
	<h4>Property</h4>
	<p><strong>Living Area:</strong> {lead.get('living_area_sqft', 'N/A')} sqft</p>
	<p><strong>Property Age:</strong> {lead.get('property_age_years', 'N/A')} years</p>
	<p><strong>Roof Material:</strong> {lead.get('roof_material', 'N/A')}</p>
	<p><strong>Shading Factor:</strong> {lead.get('shading_factor', 'N/A')}</p>
	</div>
	</div>
	<div style="display: grid; grid-template-columns: 1fr 1fr; gap: 20px; margin-top: 20px;">
	<div>
	<h4>Energy Usage</h4>
	<p><strong>Monthly kWh:</strong> {lead.get('average_monthly_kwh', 'N/A')}</p>
	<p><strong>Monthly Bill:</strong> ${lead.get('average_monthly_bill_usd', 'N/A')}</p>
	<p><strong>TDSP:</strong> {lead.get('tdsp', 'N/A')}</p>
	<p><strong>Rate Structure:</strong> {lead.get('rate_structure', 'N/A')}</p>
	</div>
	<div>
	<h4>Solar Potential</h4>
	<p><strong>Solar Potential:</strong> {lead.get('solar_potential_kwh_year', 'N/A')} kWh/year</p>
	<p><strong>Expected Savings:</strong> ${lead.get('expected_savings_usd_year', 'N/A')}/year</p>
	<p><strong>Payback Period:</strong> {lead.get('payback_years', 'N/A')} years</p>
	<p><strong>Probability to Buy:</strong> {lead.get('probability_to_buy', 'N/A')}</p>
	</div>
	</div>
	<div style="margin-top: 20px; padding: 15px; background-color: {'#e8f5e8' if lead.get('sold') else '#fff3cd'}; border-radius: 5px;">
	<h4>Status: {'SOLD' if lead.get('sold') else 'NOT SOLD'}</h4>
	{f"<p><strong>Win Reason:</strong> {lead.get('win_reason', 'N/A')}</p>" if lead.get('sold') else ''}
	{f"<p><strong>Cancel Reason:</strong> {lead.get('cancel_reason', 'N/A')}</p>" if lead.get('cancellation') else ''}
	</div>
	</div>
	"""

	return details_html

	def create_utility_analysis():
	"""Create utility and market intelligence dashboard"""
	df = load_sample_data()
	if df.empty:
	return "No data available", None, None, None

	# TDSP analysis
	tdsp_fig = None
	if 'tdsp' in df.columns and 'sold' in df.columns:
	tdsp_analysis = df.groupby('tdsp').agg({
	'lead_id': 'count',
	'sold': ['sum', 'mean']
	}).round(3)
	tdsp_analysis.columns = ['Total_Leads', 'Sold_Leads', 'Conversion_Rate']
	tdsp_analysis = tdsp_analysis.reset_index()

	tdsp_fig = px.bar(
	tdsp_analysis,
	x='tdsp',
	y=['Total_Leads', 'Sold_Leads'],
	title="Lead Volume and Sales by TDSP",
	barmode='group'
	)
	tdsp_fig.update_layout(height=400)

	# Rate structure analysis
	rate_fig = None
	if 'rate_structure' in df.columns and 'sold' in df.columns:
	rate_analysis = df.groupby('rate_structure').agg({
	'sold': 'mean',
	'lead_id': 'count'
	}).round(3)
	rate_analysis.columns = ['Conversion_Rate', 'Lead_Count']
	rate_analysis = rate_analysis.reset_index()

	rate_fig = px.scatter(
	rate_analysis,
	x='Lead_Count',
	y='Conversion_Rate',
	size='Lead_Count',
	color='rate_structure',
	title="Conversion Rate by Rate Structure",
	hover_data=['rate_structure']
	)
	rate_fig.update_layout(height=400)

	# Solar potential vs actual sales
	solar_fig = None
	if 'solar_potential_kwh_year' in df.columns and 'sold' in df.columns:
	solar_fig = px.box(
	df,
	x='sold',
	y='solar_potential_kwh_year',
	title="Solar Potential Distribution: Sold vs Not Sold",
	labels={'sold': 'Sold Status', 'solar_potential_kwh_year': 'Solar Potential (kWh/year)'}
	)
	solar_fig.update_layout(height=400)

	# Summary statistics
	summary_html = ""
	if 'tdsp' in df.columns:
	tdsp_stats = df['tdsp'].value_counts()
	summary_html = f"""
	<div style="padding: 20px;">
	<h3>Texas Utility Market Summary</h3>
	<p><strong>Top TDSP by Lead Volume:</strong></p>
	<ul>
	{''.join([f'<li>{tdsp}: {count} leads</li>' for tdsp, count in tdsp_stats.head(5).items()])}
	</ul>
	</div>
	"""

	return summary_html, tdsp_fig, rate_fig, solar_fig

	def train_and_score(
	mode: str,
	features_file: Optional[Union[str, gr.File]],
	outcomes_file: Optional[Union[str, gr.File]],
	):
	"""Enhanced train and score function with additional metrics"""
	try:
	if mode == "Use example synthetic_v2":
	features_path = PRESET_FEATURES
	outcomes_path = PRESET_OUTCOMES
	if not os.path.exists(features_path) or not os.path.exists(outcomes_path):
	raise FileNotFoundError(
	f"Preset files not found. Expected:\n- {PRESET_FEATURES}\n- {PRESET_OUTCOMES}"
	)
	else:
	f_path = _safe_path(features_file)
	o_path = _safe_path(outcomes_file)
	if not f_path or not o_path:
	raise ValueError("Please upload BOTH Features CSV and Outcomes CSV.")
	features_path = f_path
	outcomes_path = o_path
	if not os.path.exists(features_path):
	raise FileNotFoundError(f"Features file not found: {features_path}")
	if not os.path.exists(outcomes_path):
	raise FileNotFoundError(f"Outcomes file not found: {outcomes_path}")

	X = pd.read_csv(features_path)
	y_df = pd.read_csv(outcomes_path)[["lead_id", "sold"]]

	_validate_inputs(X, y_df)

	df = X.merge(y_df, on="lead_id", how="inner")

	# Select features present in this dataset
	available = [c for c in FEATURE_CANDIDATES if c in df.columns]
	if not available:
	raise ValueError(
	"No candidate features found in features CSV. "
	f"Expected any of: {', '.join(FEATURE_CANDIDATES)}"
	)

	numeric = [c for c in available if c not in CATEGORICAL]
	cat_cols = [c for c in available if c in CATEGORICAL]

	preproc = ColumnTransformer(
	transformers=[
	("num", "passthrough", numeric),
	("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
	],
	remainder="drop",
	)

	model = LogisticRegression(max_iter=1000)
	pipe = Pipeline(steps=[("pre", preproc), ("clf", model)])

	y = df["sold"].astype(int)
	# Only stratify if both classes present
	if len(np.unique(y)) >= 2:
	train_df, test_df = train_test_split(
	df, test_size=0.25, random_state=42, stratify=y
	)
	else:
	train_df, test_df = train_test_split(
	df, test_size=0.25, random_state=42, stratify=None
	)

	pipe.fit(train_df[available], train_df["sold"].astype(int))

	test_probs = pipe.predict_proba(test_df[available])[:, 1]
	test_preds = pipe.predict(test_df[available])
	auc, pr_auc, brier = _compute_metrics(test_df["sold"].values, test_probs)

	# Score all rows
	all_probs = pipe.predict_proba(df[available])[:, 1]
	preds = df[["lead_id"]].copy()
	preds["probability_to_buy"] = np.round(all_probs, 4)

	# Persist outputs
	os.makedirs(SCORES_DIR, exist_ok=True)
	ts = datetime.now().strftime("%Y%m%d_%H%M%S")
	predictions_path = os.path.join(SCORES_DIR, f"predictions_{ts}.csv")
	scored_path = os.path.join(SCORES_DIR, f"leads_features_scored_{ts}.csv")

	preds.to_csv(predictions_path, index=False)

	scored = X.merge(preds, on="lead_id", how="left")
	scored.to_csv(scored_path, index=False)

	# Enhanced metrics
	def fmt(val: Optional[float]) -> str:
	return f"{val:.3f}" if val is not None else "N/A"

	# Confusion matrix
	cm = confusion_matrix(test_df["sold"].values, test_preds) if len(np.unique(y)) >= 2 else None

	# Classification report
	class_report = classification_report(test_df["sold"].values, test_preds, output_dict=True) if len(np.unique(y)) >= 2 else None

	metrics_md = (
	"### Enhanced Model Evaluation Metrics\n"
	f"Dataset: {len(df)} leads, {len(available)} features\n"
	f"Train/Test Split: {len(train_df)}/{len(test_df)} leads\n\n"
	"#### Performance Metrics\n"
	f"- ROC AUC: {fmt(auc)}\n"
	f"- PR AUC: {fmt(pr_auc)}\n"
	f"- Brier Score: {fmt(brier)}\n"
	)

	if class_report:
	metrics_md += (
	f"- Precision (Sold): {fmt(class_report['1']['precision'])}\n"
	f"- Recall (Sold): {fmt(class_report['1']['recall'])}\n"
	f"- F1-Score (Sold): {fmt(class_report['1']['f1-score'])}\n"
	)

	metrics_md += (
	f"\n#### Feature Importance\n"
	f"Numeric Features: {', '.join(numeric)}\n"
	f"Categorical Features: {', '.join(cat_cols)}\n\n"
	f"#### Output Files\n"
	f"- `{predictions_path}`\n"
	f"- `{scored_path}`\n"
	)

	preds_preview = preds.head(20)
	scored_preview = scored.head(20)

	return metrics_md, preds_preview, scored_preview, predictions_path, scored_path

	except Exception as e:
	metrics_md = f"### Error\n{str(e)}"
	return metrics_md, pd.DataFrame(), pd.DataFrame(), None, None

	# Create the enhanced Gradio interface
	def create_enhanced_dashboard():
	with gr.Blocks(title="SOLAI Enhanced Dashboard", theme=gr.themes.Soft()) as demo:
	gr.Markdown(
	"""
	# 🌞 SOLAI Enhanced Dashboard
	### Comprehensive Solar Lead Scoring & Analytics Platform

	This enhanced dashboard provides comprehensive analytics, lead management, and ML capabilities for Texas residential solar lead scoring.
	"""
	)

	with gr.Tabs():
	# Tab 1: Overview Dashboard
	with gr.Tab("📊 Overview Dashboard"):
	gr.Markdown("## Key Performance Metrics & Analytics")

	refresh_btn = gr.Button("🔄 Refresh Dashboard", variant="secondary")

	with gr.Row():
	metrics_display = gr.HTML()

	with gr.Row():
	with gr.Column():
	geo_chart = gr.Plot(label="Geographic Distribution")
	with gr.Column():
	funnel_chart = gr.Plot(label="Conversion Funnel")

	with gr.Row():
	score_dist_chart = gr.Plot(label="Lead Score Distribution")

	def refresh_overview():
	return create_overview_dashboard()

	refresh_btn.click(
	refresh_overview,
	outputs=[metrics_display, geo_chart, funnel_chart, score_dist_chart]
	)

	# Load initial data
	demo.load(
	refresh_overview,
	outputs=[metrics_display, geo_chart, funnel_chart, score_dist_chart]
	)

	# Tab 2: Lead Management
	with gr.Tab("👥 Lead Management"):
	gr.Markdown("## Search, Filter & Manage Leads")

	with gr.Row():
	search_input = gr.Textbox(label="Search Leads", placeholder="Enter lead ID or search term...")
	tdsp_filter = gr.Dropdown(
	choices=["All", "Oncor", "CenterPoint", "AEP_Texas", "TNMP", "Austin_Energy", "CPS_Energy", "Other_Muni"],
	value="All",
	label="Filter by TDSP"
	)
	sold_filter = gr.Dropdown(
	choices=["All", "Sold", "Not Sold"],
	value="All",
	label="Filter by Status"
	)

	search_btn = gr.Button("🔍 Search Leads", variant="primary")

	with gr.Row():
	leads_table = gr.Dataframe(
	label="Lead Search Results",
	interactive=False,
	wrap=True
	)

	gr.Markdown("## Lead Details")
	with gr.Row():
	lead_id_input = gr.Textbox(label="Lead ID", placeholder="Enter lead ID for detailed view...")
	get_details_btn = gr.Button("📋 Get Lead Details", variant="secondary")

	lead_details_display = gr.HTML()

	search_btn.click(
	search_leads,
	inputs=[search_input, tdsp_filter, sold_filter],
	outputs=[leads_table]
	)

	get_details_btn.click(
	get_lead_details,
	inputs=[lead_id_input],
	outputs=[lead_details_display]
	)

	# Tab 3: Utility & Market Intelligence
	with gr.Tab("⚡ Utility & Market Intelligence"):
	gr.Markdown("## Texas Utility Territory & Market Analysis")

	refresh_utility_btn = gr.Button("🔄 Refresh Analysis", variant="secondary")

	with gr.Row():
	utility_summary = gr.HTML()

	with gr.Row():
	with gr.Column():
	tdsp_analysis_chart = gr.Plot(label="TDSP Analysis")
	with gr.Column():
	rate_structure_chart = gr.Plot(label="Rate Structure Impact")

	with gr.Row():
	solar_potential_chart = gr.Plot(label="Solar Potential Analysis")

	def refresh_utility():
	return create_utility_analysis()

	refresh_utility_btn.click(
	refresh_utility,
	outputs=[utility_summary, tdsp_analysis_chart, rate_structure_chart, solar_potential_chart]
	)

	# Load initial data
	demo.load(
	refresh_utility,
	outputs=[utility_summary, tdsp_analysis_chart, rate_structure_chart, solar_potential_chart]
	)

	# Tab 4: Enhanced ML Training & Scoring
	with gr.Tab("🤖 ML Training & Scoring"):
	gr.Markdown("## Enhanced Machine Learning Pipeline")

	with gr.Row():
	mode = gr.Radio(
	choices=["Use example synthetic_v2", "Upload CSVs"],
	value="Use example synthetic_v2",
	label="Data Source",
	)

	with gr.Row():
	features_upload = gr.File(
	label="Features CSV (for 'Upload CSVs' mode)",
	file_types=[".csv"],
	visible=False,
	)
	outcomes_upload = gr.File(
	label="Outcomes CSV with columns [lead_id, sold] (for 'Upload CSVs' mode)",
	file_types=[".csv"],
	visible=False,
	)

	def toggle_uploads(selected_mode: str):
	show = selected_mode == "Upload CSVs"
	return [
	gr.update(visible=show),
	gr.update(visible=show),
	]

	mode.change(
	toggle_uploads,
	inputs=[mode],
	outputs=[features_upload, outcomes_upload],
	)

	with gr.Row():
	run_btn = gr.Button("🚀 Train + Score Model", variant="primary", size="lg")

	with gr.Row():
	metrics_md = gr.Markdown()

	with gr.Row():
	with gr.Column():
	preds_df = gr.Dataframe(label="Predictions Preview", interactive=False)
	with gr.Column():
	scored_df = gr.Dataframe(label="Scored Features Preview", interactive=False)

	with gr.Row():
	pred_file = gr.File(label="📥 Download Predictions CSV")
	scored_file = gr.File(label="📥 Download Scored Features CSV")

	run_btn.click(
	fn=train_and_score,
	inputs=[mode, features_upload, outcomes_upload],
	outputs=[metrics_md, preds_df, scored_df, pred_file, scored_file],
	)

	# Tab 5: Data Dictionary
	with gr.Tab("📚 Data Dictionary"):
	gr.Markdown("## SOLAI Data Dictionary & Field Descriptions")

	# Create data dictionary display
	dict_html = """
	<div style="padding: 20px;">
	<h3>Field Groups & Descriptions</h3>
	"""

	for group in DATA_DICT.get('field_groups', []):
	dict_html += f"""
	<div style="margin: 20px 0; padding: 15px; border: 1px solid #ddd; border-radius: 8px;">
	<h4 style="color: #2196F3;">{group.get('name', 'Unknown Group')}</h4>
	<p style="color: #666; margin-bottom: 15px;">{group.get('description', '')}</p>
	<div style="display: grid; grid-template-columns: repeat(auto-fit, minmax(300px, 1fr)); gap: 10px;">
	"""

	for field in group.get('fields', []):
	field_name = field.get('field_name', '')
	label = field.get('label', field_name)
	description = field.get('description', '')
	data_type = field.get('data_type', '')
	is_pii = field.get('is_pii', False)

	pii_badge = '<span style="background: #ff4444; color: white; padding: 2px 6px; border-radius: 3px; font-size: 10px;">PII</span>' if is_pii else ''

	dict_html += f"""
	<div style="padding: 10px; background: #f9f9f9; border-radius: 5px;">
	<strong>{label}</strong> {pii_badge}<br>
	<code style="background: #e0e0e0; padding: 2px 4px; border-radius: 3px;">{field_name}</code>
	<span style="color: #666;">({data_type})</span><br>
	<small style="color: #666;">{description}</small>
	</div>
	"""

	dict_html += "</div></div>"

	dict_html += "</div>"

	gr.HTML(dict_html)

	return demo

	if __name__ == "__main__":
	demo = create_enhanced_dashboard()
	port = int(os.environ.get('GRADIO_SERVER_PORT', 7861))
	demo.launch(server_name="0.0.0.0", server_port=port, share=True)