| import streamlit as st |
| import requests |
| import pandas as pd |
| import io |
| import os |
| from PIL import Image |
| import time |
|
|
| |
| st.set_page_config( |
| page_title="PromptPrepML - Auto ML Data Preprocessing", |
| page_icon="π€", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
|
|
| |
| st.markdown(""" |
| <style> |
| .main-header { |
| font-size: 2.5rem; |
| font-weight: bold; |
| color: #1f2937; |
| text-align: center; |
| margin-bottom: 2rem; |
| } |
| .step-header { |
| font-size: 1.5rem; |
| font-weight: 600; |
| color: #374151; |
| margin: 1rem 0; |
| } |
| .success-box { |
| background-color: #f0fdf4; |
| border: 1px solid #bbf7d0; |
| border-radius: 0.5rem; |
| padding: 1rem; |
| margin: 1rem 0; |
| } |
| .info-box { |
| background-color: #eff6ff; |
| border: 1px solid #bfdbfe; |
| border-radius: 0.5rem; |
| padding: 1rem; |
| margin: 1rem 0; |
| } |
| .warning-box { |
| background-color: #fffbeb; |
| border: 1px solid #fed7aa; |
| border-radius: 0.5rem; |
| padding: 1rem; |
| margin: 1rem 0; |
| } |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| DEPLOYED_BACKEND = "https://promptprepml-backend.railway.app" |
| LOCAL_BACKEND = "http://localhost:8000" |
|
|
| def check_backend_health(): |
| """Check if backend is running (try deployed first, then local)""" |
| backends = [DEPLOYED_BACKEND, LOCAL_BACKEND] |
| |
| for backend_url in backends: |
| try: |
| response = requests.get(f"{backend_url}/health", timeout=5) |
| if response.status_code == 200: |
| st.session_state.backend_url = backend_url |
| return True, backend_url |
| except: |
| continue |
| return False, None |
|
|
| def upload_dataset(uploaded_file): |
| """Upload dataset to backend""" |
| if 'backend_url' not in st.session_state: |
| return None, "Backend not connected" |
| |
| try: |
| files = {'file': uploaded_file} |
| response = requests.post(f"{st.session_state.backend_url}/upload-dataset", files=files) |
| if response.status_code == 200: |
| return response.json(), None |
| else: |
| return None, f"Upload failed: {response.text}" |
| except Exception as e: |
| return None, f"Upload error: {str(e)}" |
|
|
| def process_pipeline(uploaded_file, prompt): |
| """Process dataset through ML pipeline""" |
| if 'backend_url' not in st.session_state: |
| return None, "Backend not connected" |
| |
| try: |
| files = {'file': uploaded_file} |
| data = {'prompt': prompt} |
| response = requests.post(f"{st.session_state.backend_url}/process-pipeline", files=files, data=data) |
| if response.status_code == 200: |
| return response.json(), None |
| else: |
| return None, f"Processing failed: {response.text}" |
| except Exception as e: |
| return None, f"Processing error: {str(e)}" |
|
|
| def download_file(filename): |
| """Download processed file""" |
| if 'backend_url' not in st.session_state: |
| return None, "Backend not connected" |
| |
| try: |
| response = requests.get(f"{st.session_state.backend_url}/api/download/{filename}") |
| if response.status_code == 200: |
| return response.content, None |
| else: |
| return None, f"Download failed: {response.text}" |
| except Exception as e: |
| return None, f"Download error: {str(e)}" |
|
|
| def main(): |
| |
| st.markdown('<h1 class="main-header">π€ PromptPrepML</h1>', unsafe_allow_html=True) |
| st.markdown('<p style="text-align: center; color: #6b7280; font-size: 1.1rem;">Convert natural language prompts into ML-ready datasets</p>', unsafe_allow_html=True) |
| |
| |
| backend_healthy, backend_url = check_backend_health() |
| |
| if not backend_healthy: |
| st.error("β Backend is not running! Please start the backend:") |
| st.code(""" |
| cd promptprepml/backend |
| venv\\Scripts\\activate |
| python app/main.py |
| |
| # OR wait for deployed backend to be ready |
| """) |
| st.info("π **Deploying backend to cloud...** This will make the app work standalone!") |
| return |
| |
| st.success(f"β
Backend connected at: {backend_url}") |
| |
| |
| st.sidebar.title("π Processing Steps") |
| |
| |
| if 'step' not in st.session_state: |
| st.session_state.step = 'upload' |
| if 'upload_result' not in st.session_state: |
| st.session_state.upload_result = None |
| if 'processing_result' not in st.session_state: |
| st.session_state.processing_result = None |
| |
| |
| steps = ['π€ Upload', 'βοΈ Configure', 'π Process', 'π Results'] |
| current_step_index = 0 |
| |
| if st.session_state.step == 'upload': |
| current_step_index = 0 |
| elif st.session_state.step == 'configure': |
| current_step_index = 1 |
| elif st.session_state.step == 'process': |
| current_step_index = 2 |
| elif st.session_state.step == 'results': |
| current_step_index = 3 |
| |
| |
| for i, step in enumerate(steps): |
| if i < current_step_index: |
| st.sidebar.success(f"β
{step}") |
| elif i == current_step_index: |
| st.sidebar.info(f"π {step}") |
| else: |
| st.sidebar.write(f"β³ {step}") |
| |
| |
| if st.session_state.step == 'upload': |
| st.markdown('<h2 class="step-header">π€ Step 1: Upload Dataset</h2>', unsafe_allow_html=True) |
| |
| uploaded_file = st.file_uploader( |
| "Choose a CSV file", |
| type=['csv'], |
| help="Upload your dataset for preprocessing" |
| ) |
| |
| if uploaded_file is not None: |
| st.info(f"π File uploaded: `{uploaded_file.name}`") |
| |
| |
| try: |
| df = pd.read_csv(uploaded_file) |
| st.markdown('<div class="info-box">', unsafe_allow_html=True) |
| st.markdown(f"**Dataset Shape:** {df.shape}") |
| st.markdown(f"**Columns:** {', '.join(df.columns)}") |
| st.dataframe(df.head()) |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| if st.button("π Continue to Configuration", type="primary"): |
| |
| with st.spinner("Uploading dataset..."): |
| result, error = upload_dataset(uploaded_file) |
| if error: |
| st.error(f"β Upload failed: {error}") |
| else: |
| st.session_state.upload_result = result |
| st.session_state.step = 'configure' |
| st.rerun() |
| except Exception as e: |
| st.error(f"β Error reading file: {str(e)}") |
| |
| |
| elif st.session_state.step == 'configure': |
| st.markdown('<h2 class="step-header">βοΈ Step 2: Configure Processing</h2>', unsafe_allow_html=True) |
| |
| if st.session_state.upload_result: |
| file_info = st.session_state.upload_result |
| st.markdown('<div class="info-box">', unsafe_allow_html=True) |
| st.markdown(f"**File:** {file_info.get('filename', 'Unknown')}") |
| st.markdown(f"**Size:** {file_info.get('size', 'Unknown')} bytes") |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| prompt = st.text_area( |
| "Describe your preprocessing needs:", |
| value="Prepare this dataset for machine learning. Handle missing values, remove identifier columns, extract date features, encode categorical variables, and scale numeric features.", |
| height=100, |
| help="Describe what you want to do with your dataset in natural language" |
| ) |
| |
| col1, col2 = st.columns([1, 1]) |
| with col1: |
| if st.button("β¬
οΈ Back", type="secondary"): |
| st.session_state.step = 'upload' |
| st.rerun() |
| |
| with col2: |
| if st.button("π Start Processing", type="primary"): |
| if uploaded_file is not None: |
| with st.spinner("Processing dataset... This may take a few minutes."): |
| result, error = process_pipeline(uploaded_file, prompt) |
| if error: |
| st.error(f"β Processing failed: {error}") |
| else: |
| st.session_state.processing_result = result |
| st.session_state.step = 'results' |
| st.rerun() |
| |
| |
| elif st.session_state.step == 'results': |
| st.markdown('<h2 class="step-header">π Step 3: Results</h2>', unsafe_allow_html=True) |
| |
| if st.session_state.processing_result: |
| result = st.session_state.processing_result |
| |
| |
| st.markdown('<div class="success-box">', unsafe_allow_html=True) |
| st.success("β
Dataset processed successfully!") |
| st.markdown('</div>', unsafe_allow_html=True) |
| |
| |
| col1, col2 = st.columns([2, 1]) |
| |
| with col1: |
| st.markdown("### π Processing Summary") |
| |
| dataset_info = result.get('dataset_info', {}) |
| if dataset_info: |
| basic_info = dataset_info.get('basic_info', {}) |
| st.markdown(f"- **Original Shape:** {basic_info.get('shape', 'Unknown')}") |
| st.markdown(f"- **Columns:** {basic_info.get('columns', 'Unknown')}") |
| |
| preprocessing_info = result.get('preprocessing_info', {}) |
| if preprocessing_info: |
| st.markdown(f"- **Processed Shape:** {preprocessing_info.get('processed_shape', 'Unknown')}") |
| |
| |
| st.markdown("### π Dataset Preview") |
| preview_data = result.get('preview_data', []) |
| if preview_data: |
| df_preview = pd.DataFrame(preview_data) |
| st.dataframe(df_preview) |
| |
| with col2: |
| st.markdown("### π₯ Download Files") |
| |
| download_links = [ |
| ("Processed Dataset", "processed_dataset.csv"), |
| ("Training Set", "train.csv"), |
| ("Test Set", "test.csv"), |
| ("Pipeline", "pipeline.pkl"), |
| ("EDA Report", "eda_report.html") |
| ] |
| |
| for name, filename in download_links: |
| if st.button(f"π₯ {name}", key=f"download_{filename}"): |
| with st.spinner(f"Downloading {filename}..."): |
| file_content, error = download_file(filename) |
| if error: |
| st.error(f"β Download failed: {error}") |
| else: |
| st.download_button( |
| label=f"πΎ Save {filename}", |
| data=file_content, |
| file_name=filename, |
| mime="application/octet-stream" |
| ) |
| |
| |
| col1, col2 = st.columns([1, 1]) |
| with col1: |
| if st.button("π Process New Dataset", type="secondary"): |
| |
| for key in list(st.session_state.keys()): |
| del st.session_state[key] |
| st.session_state.step = 'upload' |
| st.rerun() |
| |
| with col2: |
| if st.button("π View EDA Report", type="primary"): |
| st.info("π EDA Report feature coming soon!") |
| |
| |
| st.markdown("---") |
| st.markdown(""" |
| <div style="text-align: center; color: #6b7280; margin-top: 2rem;"> |
| <p><strong>PromptPrepML</strong> - Automated ML Data Preprocessing</p> |
| <p><small>Convert natural language prompts into ML-ready datasets</small></p> |
| </div> |
| """, unsafe_allow_html=True) |
|
|
| if __name__ == "__main__": |
| main() |
|
|