Spaces:

Learnerbegginer
/

Auto-ML-Preprocessing

Running

App Files Files Community

Auto-ML-Preprocessing / app.py

Learnerbegginer

Fix API endpoints - use correct backend endpoints (/upload-dataset and /process-pipeline)

301288a about 1 month ago

raw

history blame contribute delete

12.6 kB

	import streamlit as st
	import requests
	import pandas as pd
	import io
	import os
	from PIL import Image
	import time

	# Configure page
	st.set_page_config(
	page_title="PromptPrepML - Auto ML Data Preprocessing",
	page_icon="🤖",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	# Custom CSS for better styling
	st.markdown("""
	<style>
	.main-header {
	font-size: 2.5rem;
	font-weight: bold;
	color: #1f2937;
	text-align: center;
	margin-bottom: 2rem;
	}
	.step-header {
	font-size: 1.5rem;
	font-weight: 600;
	color: #374151;
	margin: 1rem 0;
	}
	.success-box {
	background-color: #f0fdf4;
	border: 1px solid #bbf7d0;
	border-radius: 0.5rem;
	padding: 1rem;
	margin: 1rem 0;
	}
	.info-box {
	background-color: #eff6ff;
	border: 1px solid #bfdbfe;
	border-radius: 0.5rem;
	padding: 1rem;
	margin: 1rem 0;
	}
	.warning-box {
	background-color: #fffbeb;
	border: 1px solid #fed7aa;
	border-radius: 0.5rem;
	padding: 1rem;
	margin: 1rem 0;
	}
	</style>
	""", unsafe_allow_html=True)

	# API base URLs - try deployed backend first, fallback to localhost
	DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app"
	LOCAL_BACKEND = "http://localhost:8000"

	def check_backend_health():
	"""Check if backend is running (try deployed first, then local)"""
	backends = [DEPLOYED_BACKEND, LOCAL_BACKEND]

	for backend_url in backends:
	try:
	response = requests.get(f"{backend_url}/health", timeout=5)
	if response.status_code == 200:
	st.session_state.backend_url = backend_url
	return True, backend_url
	except:
	continue
	return False, None

	def upload_dataset(uploaded_file):
	"""Upload dataset to backend"""
	if 'backend_url' not in st.session_state:
	return None, "Backend not connected"

	try:
	files = {'file': uploaded_file}
	response = requests.post(f"{st.session_state.backend_url}/upload-dataset", files=files)
	if response.status_code == 200:
	return response.json(), None
	else:
	return None, f"Upload failed: {response.text}"
	except Exception as e:
	return None, f"Upload error: {str(e)}"

	def process_pipeline(uploaded_file, prompt):
	"""Process dataset through ML pipeline"""
	if 'backend_url' not in st.session_state:
	return None, "Backend not connected"

	try:
	files = {'file': uploaded_file}
	data = {'prompt': prompt}
	response = requests.post(f"{st.session_state.backend_url}/process-pipeline", files=files, data=data)
	if response.status_code == 200:
	return response.json(), None
	else:
	return None, f"Processing failed: {response.text}"
	except Exception as e:
	return None, f"Processing error: {str(e)}"

	def download_file(filename):
	"""Download processed file"""
	if 'backend_url' not in st.session_state:
	return None, "Backend not connected"

	try:
	response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}")
	if response.status_code == 200:
	return response.content, None
	else:
	return None, f"Download failed: {response.text}"
	except Exception as e:
	return None, f"Download error: {str(e)}"

	def main():
	# Main header
	st.markdown('<h1 class="main-header">🤖 PromptPrepML</h1>', unsafe_allow_html=True)
	st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True)

	# Check backend health
	backend_healthy, backend_url = check_backend_health()

	if not backend_healthy:
	st.error("❌ Backend is not running! Please start the backend:")
	st.code("""
	cd promptprepml/backend
	venv\\Scripts\\activate
	python app/main.py

	# OR wait for deployed backend to be ready
	""")
	st.info("🚀 Deploying backend to cloud... This will make the app work standalone!")
	return

	st.success(f"✅ Backend connected at: {backend_url}")

	# Sidebar for navigation
	st.sidebar.title("📋 Processing Steps")

	# Initialize session state
	if 'step' not in st.session_state:
	st.session_state.step = 'upload'
	if 'upload_result' not in st.session_state:
	st.session_state.upload_result = None
	if 'processing_result' not in st.session_state:
	st.session_state.processing_result = None

	# Step indicators
	steps = ['📤 Upload', '⚙️ Configure', '🚀 Process', '📊 Results']
	current_step_index = 0

	if st.session_state.step == 'upload':
	current_step_index = 0
	elif st.session_state.step == 'configure':
	current_step_index = 1
	elif st.session_state.step == 'process':
	current_step_index = 2
	elif st.session_state.step == 'results':
	current_step_index = 3

	# Display step indicators
	for i, step in enumerate(steps):
	if i < current_step_index:
	st.sidebar.success(f"✅ {step}")
	elif i == current_step_index:
	st.sidebar.info(f"🔄 {step}")
	else:
	st.sidebar.write(f"⏳ {step}")

	# Step 1: Upload Dataset
	if st.session_state.step == 'upload':
	st.markdown('<h2 class="step-header">📤 Step 1: Upload Dataset</h2>', unsafe_allow_html=True)

	uploaded_file = st.file_uploader(
	"Choose a CSV file",
	type=['csv'],
	help="Upload your dataset for preprocessing"
	)

	if uploaded_file is not None:
	st.info(f"📄 File uploaded: `{uploaded_file.name}`")

	# Show file preview
	try:
	df = pd.read_csv(uploaded_file)
	st.markdown('<div class="info-box">', unsafe_allow_html=True)
	st.markdown(f"Dataset Shape: {df.shape}")
	st.markdown(f"Columns: {', '.join(df.columns)}")
	st.dataframe(df.head())
	st.markdown('</div>', unsafe_allow_html=True)

	if st.button("🚀 Continue to Configuration", type="primary"):
	# Upload to backend
	with st.spinner("Uploading dataset..."):
	result, error = upload_dataset(uploaded_file)
	if error:
	st.error(f"❌ Upload failed: {error}")
	else:
	st.session_state.upload_result = result
	st.session_state.step = 'configure'
	st.rerun()
	except Exception as e:
	st.error(f"❌ Error reading file: {str(e)}")

	# Step 2: Configure Processing
	elif st.session_state.step == 'configure':
	st.markdown('<h2 class="step-header">⚙️ Step 2: Configure Processing</h2>', unsafe_allow_html=True)

	if st.session_state.upload_result:
	file_info = st.session_state.upload_result
	st.markdown('<div class="info-box">', unsafe_allow_html=True)
	st.markdown(f"File: {file_info.get('filename', 'Unknown')}")
	st.markdown(f"Size: {file_info.get('size', 'Unknown')} bytes")
	st.markdown('</div>', unsafe_allow_html=True)

	# Processing options
	prompt = st.text_area(
	"Describe your preprocessing needs:",
	value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.",
	height=100,
	help="Describe what you want to do with your dataset in natural language"
	)

	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button("⬅️ Back", type="secondary"):
	st.session_state.step = 'upload'
	st.rerun()

	with col2:
	if st.button("🚀 Start Processing", type="primary"):
	if uploaded_file is not None:
	with st.spinner("Processing dataset... This may take a few minutes."):
	result, error = process_pipeline(uploaded_file, prompt)
	if error:
	st.error(f"❌ Processing failed: {error}")
	else:
	st.session_state.processing_result = result
	st.session_state.step = 'results'
	st.rerun()

	# Step 3: Results
	elif st.session_state.step == 'results':
	st.markdown('<h2 class="step-header">📊 Step 3: Results</h2>', unsafe_allow_html=True)

	if st.session_state.processing_result:
	result = st.session_state.processing_result

	# Success message
	st.markdown('<div class="success-box">', unsafe_allow_html=True)
	st.success("✅ Dataset processed successfully!")
	st.markdown('</div>', unsafe_allow_html=True)

	# Results summary
	col1, col2 = st.columns([2, 1])

	with col1:
	st.markdown("### 📈 Processing Summary")

	dataset_info = result.get('dataset_info', {})
	if dataset_info:
	basic_info = dataset_info.get('basic_info', {})
	st.markdown(f"- Original Shape: {basic_info.get('shape', 'Unknown')}")
	st.markdown(f"- Columns: {basic_info.get('columns', 'Unknown')}")

	preprocessing_info = result.get('preprocessing_info', {})
	if preprocessing_info:
	st.markdown(f"- Processed Shape: {preprocessing_info.get('processed_shape', 'Unknown')}")

	# Dataset preview
	st.markdown("### 👀 Dataset Preview")
	preview_data = result.get('preview_data', [])
	if preview_data:
	df_preview = pd.DataFrame(preview_data)
	st.dataframe(df_preview)

	with col2:
	st.markdown("### 📥 Download Files")

	download_links = [
	("Processed Dataset", "processed_dataset.csv"),
	("Training Set", "train.csv"),
	("Test Set", "test.csv"),
	("Pipeline", "pipeline.pkl"),
	("EDA Report", "eda_report.html")
	]

	for name, filename in download_links:
	if st.button(f"📥 {name}", key=f"download_{filename}"):
	with st.spinner(f"Downloading {filename}..."):
	file_content, error = download_file(filename)
	if error:
	st.error(f"❌ Download failed: {error}")
	else:
	st.download_button(
	label=f"💾 Save {filename}",
	data=file_content,
	file_name=filename,
	mime="application/octet-stream"
	)

	# Action buttons
	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button("🔄 Process New Dataset", type="secondary"):
	# Reset session state
	for key in list(st.session_state.keys()):
	del st.session_state[key]
	st.session_state.step = 'upload'
	st.rerun()

	with col2:
	if st.button("📈 View EDA Report", type="primary"):
	st.info("📊 EDA Report feature coming soon!")

	# Footer
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; color: #6b7280; margin-top: 2rem;">
	<p><strong>PromptPrepML</strong> - Automated ML Data Preprocessing</p>
	<p><small>Convert natural language prompts into ML-ready datasets</small></p>
	</div>
	""", unsafe_allow_html=True)

	if __name__ == "__main__":
	main()