| import streamlit as st |
| from src.preprocessing.clean_data import cached_clean_csv |
| import pandas as pd |
| from functools import lru_cache |
|
|
| |
| @lru_cache(maxsize=1) |
| def get_static_content(): |
| """Cache static HTML content to avoid regeneration.""" |
| welcome_header = """ |
| <div class="welcome-header" style="text-align: left; margin-bottom: 2rem;"> |
| <h1>Experience Ai like never before</h1> |
| <p class="subtitle"> |
| Performance, Analysis, Insights Made Simple. |
| </p> |
| </div> |
| """ |
| features_header = "## β¨ Key Features" |
| feature_cards = [ |
| """ |
| <div class="feature-card"> |
| <h3>π Data Analysis</h3> |
| <ul> |
| <li>Automated data cleaning</li> |
| <li>Interactive visualizations</li> |
| <li>Statistical insights</li> |
| <li>Correlation analysis</li> |
| </ul> |
| </div> |
| """, |
| """ |
| <div class="feature-card"> |
| <h3>π€ Machine Learning</h3> |
| <ul> |
| <li>Multiple ML algorithms</li> |
| <li>Automated model selection</li> |
| <li>Hyperparameter tuning</li> |
| <li>Performance metrics</li> |
| </ul> |
| </div> |
| """, |
| """ |
| <div class="feature-card"> |
| <h3>π AI Insights</h3> |
| <ul> |
| <li>Data quality checks</li> |
| <li>Feature importance</li> |
| <li>Model explanations</li> |
| <li>Smart recommendations</li> |
| </ul> |
| </div> |
| """ |
| ] |
| getting_started = """ |
| ## π Getting Started |
| 1. **Upload Your Dataset**: Use the sidebar to upload your CSV file |
| 2. **Explore Data**: View statistics and visualizations in the Overview tab |
| 3. **Train Models**: Select algorithms and tune parameters |
| 4. **Get Insights**: Receive AI-powered recommendations |
| """ |
| dataset_requirements = """ |
| * File format: CSV |
| * Maximum size: 200MB |
| * Supported column types: |
| * Numeric (int, float) |
| * Categorical (string, boolean) |
| * Temporal (date, datetime) |
| * Clean data preferred, but not required |
| """ |
| example_datasets = """ |
| Try these example datasets to explore the app: |
| * [Iris Dataset](https://archive.ics.uci.edu/ml/datasets/iris) |
| * [Boston Housing](https://www.kaggle.com/c/boston-housing) |
| * [Wine Quality](https://archive.ics.uci.edu/ml/datasets/wine+quality) |
| """ |
| return welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets |
|
|
| def show_welcome_page(): |
| """Display welcome page with features and instructions efficiently.""" |
| |
| welcome_header, features_header, feature_cards, getting_started, dataset_requirements, example_datasets = get_static_content() |
|
|
| |
| st.markdown(welcome_header, unsafe_allow_html=True) |
| st.markdown(features_header, unsafe_allow_html=True) |
|
|
|
|
|
|
| |
| col1, col2, col3 = st.columns(3, gap="medium") |
| with col1: |
| st.markdown(feature_cards[0], unsafe_allow_html=True) |
| with col2: |
| st.markdown(feature_cards[1], unsafe_allow_html=True) |
| with col3: |
| st.markdown(feature_cards[2], unsafe_allow_html=True) |
|
|
| st.markdown("<br>", unsafe_allow_html=True) |
|
|
| |
| st.markdown(getting_started, unsafe_allow_html=True) |
| with st.expander("π Dataset Requirements"): |
| st.markdown(dataset_requirements) |
| |
| with st.expander("π― Example Datasets"): |
| st.markdown(example_datasets) |
|
|
|
|
| |
| |
| st.markdown("### π€ Upload Your Dataset (Currently Using Default Dataset)") |
|
|
| |
| skip_cleaning = st.checkbox("My dataset is already cleaned (skip cleaning)") |
| |
| uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) |
| |
| if uploaded_file is not None: |
| try: |
| |
| file_details = {"FileName": uploaded_file.name, "FileType": uploaded_file.type, "FileSize": uploaded_file.size} |
| if uploaded_file.size > 200 * 1024 * 1024: |
| st.error("β File size exceeds 200MB limit. Please upload a smaller file.") |
| return |
| |
| |
| try: |
| df = pd.read_csv(uploaded_file) |
| if df.empty: |
| st.error("β The uploaded file is empty. Please upload a file with data.") |
| return |
| |
| st.success("β
Dataset uploaded successfully!") |
| except pd.errors.EmptyDataError: |
| st.error("β The uploaded file is empty. Please upload a file with data.") |
| return |
| except pd.errors.ParserError: |
| st.error("β Unable to parse the CSV file. Please ensure it's properly formatted.") |
| return |
|
|
| |
| df_json = df.to_json(orient='records') |
| |
| |
| with st.spinner("π§ AI is analyzing and cleaning the data..." if not skip_cleaning else "Processing dataset..."): |
| try: |
| cleaned_df, insights = cached_clean_csv(df_json, skip_cleaning) |
| except Exception as cleaning_error: |
| st.error(f"β Error during data cleaning: {str(cleaning_error)}") |
| |
| st.warning("β οΈ Using original dataset without cleaning due to errors.") |
| cleaned_df = df |
| insights = "Cleaning failed, using original data." |
| |
| |
| st.session_state.df = cleaned_df |
| st.session_state.insights = insights |
| st.session_state.data_cleaned = True |
| st.session_state.dataset_loaded = True |
| |
| |
| st.session_state.is_user_uploaded = True |
| |
| |
| |
| st.session_state.original_df_json = df_json |
| st.session_state.skip_cleaning = skip_cleaning |
| |
| |
| if "column_types" in st.session_state: |
| del st.session_state.column_types |
| if "corr_matrix" in st.session_state: |
| del st.session_state.corr_matrix |
| if "df_hash" in st.session_state: |
| del st.session_state.df_hash |
| if "test_results_calculated" in st.session_state: |
| st.session_state.test_results_calculated = False |
| |
| if skip_cleaning: |
| st.success("β
Using uploaded dataset as-is (skipped cleaning).") |
| else: |
| st.success("β
Data cleaned successfully!") |
| |
| except Exception as e: |
| st.error(f"β Error processing dataset: {str(e)}") |
| st.info("βΉοΈ Please check that your file is a valid CSV and try again.") |
|
|