| import gradio as gr |
| import plotly.express as px |
| import pandas as pd |
| import io |
|
|
| |
| datasets = {} |
|
|
| |
| default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv') |
| datasets['Gapminder'] = default_df |
|
|
| |
| def load_builtin_dataset(dataset_name): |
| """Load various built-in datasets""" |
| try: |
| if dataset_name == "Gapminder": |
| df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv') |
| datasets[dataset_name] = df |
| return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" |
| |
| elif dataset_name == "Iris": |
| df = px.data.iris() |
| datasets[dataset_name] = df |
| return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" |
| |
| elif dataset_name == "Tips": |
| df = px.data.tips() |
| datasets[dataset_name] = df |
| return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" |
| |
| elif dataset_name == "Stock Data": |
| df = px.data.stocks() |
| |
| df = df.melt(id_vars='date', var_name='company', value_name='stock_price') |
| df['date'] = pd.to_datetime(df['date']) |
| datasets[dataset_name] = df |
| return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" |
| |
| elif dataset_name == "Wind Data": |
| df = px.data.wind() |
| datasets[dataset_name] = df |
| return df, f"β
Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" |
| |
| except Exception as e: |
| return None, f"β Error loading {dataset_name}: {str(e)}" |
|
|
| |
| def upload_dataset(file, custom_name): |
| """Handle CSV/Excel file uploads""" |
| if file is None: |
| return None, "Please upload a file", gr.update(choices=list(datasets.keys())) |
| |
| try: |
| |
| if file.name.endswith('.csv'): |
| df = pd.read_csv(file.name) |
| elif file.name.endswith(('.xlsx', '.xls')): |
| df = pd.read_excel(file.name) |
| else: |
| return None, "β Unsupported file format. Please upload CSV or Excel.", gr.update() |
| |
| |
| dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0] |
| datasets[dataset_name] = df |
| |
| return df, f"β
Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name) |
| |
| except Exception as e: |
| return None, f"β Error reading file: {str(e)}", gr.update() |
|
|
| |
| def switch_dataset(dataset_name): |
| """Switch to a different dataset""" |
| if dataset_name in datasets: |
| df = datasets[dataset_name] |
| |
| numeric_cols = df.select_dtypes(include=['number']).columns.tolist() |
| categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() |
| all_cols = df.columns.tolist() |
| |
| info = f""" |
| ### Dataset: {dataset_name} |
| - **Rows**: {len(df)} |
| - **Columns**: {len(df.columns)} |
| - **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''} |
| - **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''} |
| """ |
| |
| return ( |
| df.head(10), |
| info, |
| gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), |
| gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), |
| gr.update(choices=[""] + categorical_cols, value=""), |
| gr.update(choices=[""] + numeric_cols, value=""), |
| df |
| ) |
| else: |
| return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None |
|
|
| |
| def create_plot(df, plot_type, x_col, y_col, color_col, size_col): |
| """Create different plot types based on current dataset and selections""" |
| if df is None or x_col is None: |
| return None |
| |
| try: |
| |
| color_col = None if color_col == "" else color_col |
| size_col = None if size_col == "" else size_col |
| |
| |
| if plot_type == "Scatter": |
| fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col, |
| title=f"Scatter: {x_col} vs {y_col}") |
| |
| elif plot_type == "Line": |
| fig = px.line(df, x=x_col, y=y_col, color=color_col, |
| title=f"Line: {x_col} vs {y_col}") |
| |
| elif plot_type == "Bar": |
| |
| if color_col: |
| fig = px.bar(df, x=x_col, y=y_col, color=color_col, |
| title=f"Bar: {x_col} vs {y_col}") |
| else: |
| fig = px.bar(df, x=x_col, y=y_col, |
| title=f"Bar: {x_col} vs {y_col}") |
| |
| elif plot_type == "Histogram": |
| fig = px.histogram(df, x=x_col, color=color_col, |
| title=f"Histogram of {x_col}") |
| |
| elif plot_type == "Box": |
| fig = px.box(df, x=x_col, y=y_col, color=color_col, |
| title=f"Box plot: {x_col} vs {y_col}") |
| |
| elif plot_type == "Heatmap": |
| |
| numeric_df = df.select_dtypes(include=['number']) |
| if len(numeric_df.columns) > 1: |
| corr = numeric_df.corr() |
| fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap") |
| else: |
| return None |
| |
| fig.update_layout(height=500) |
| return fig |
| |
| except Exception as e: |
| print(f"Plot error: {e}") |
| return None |
|
|
| |
| with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo: |
| gr.Markdown(""" |
| # π Dynamic Dataset Explorer |
| Upload your own data or explore built-in datasets with automatic visualization |
| """) |
| |
| |
| current_df = gr.State(value=default_df) |
| |
| with gr.Tabs(): |
| |
| with gr.TabItem("π Dataset Management"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| gr.Markdown("### Load Built-in Dataset") |
| builtin_choice = gr.Dropdown( |
| choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"], |
| value="Gapminder", |
| label="Select Dataset" |
| ) |
| load_builtin_btn = gr.Button("Load Dataset", variant="primary") |
| |
| gr.Markdown("### Upload Custom Dataset") |
| file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"]) |
| custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset") |
| upload_btn = gr.Button("Upload", variant="primary") |
| |
| gr.Markdown("### Active Datasets") |
| dataset_selector = gr.Dropdown( |
| choices=list(datasets.keys()), |
| value="Gapminder", |
| label="Switch Dataset" |
| ) |
| |
| with gr.Column(scale=2): |
| status_msg = gr.Markdown("Ready to load data") |
| data_info = gr.Markdown() |
| data_preview = gr.Dataframe(label="Data Preview (first 10 rows)") |
| |
| |
| with gr.TabItem("π Visualization"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| plot_type = gr.Radio( |
| choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"], |
| value="Scatter", |
| label="Plot Type" |
| ) |
| |
| x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True) |
| y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True) |
| color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True) |
| size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True) |
| |
| plot_btn = gr.Button("Create Plot", variant="primary") |
| |
| with gr.Column(scale=2): |
| plot_output = gr.Plot(label="Visualization") |
| |
| |
| with gr.TabItem("π Data Analysis"): |
| with gr.Row(): |
| with gr.Column(): |
| analysis_type = gr.Radio( |
| choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"], |
| value="Summary Statistics", |
| label="Analysis Type" |
| ) |
| analyze_btn = gr.Button("Analyze", variant="primary") |
| |
| with gr.Column(): |
| analysis_output = gr.Markdown() |
| |
| def analyze_data(df, analysis_type): |
| """Perform different types of data analysis""" |
| if df is None: |
| return "No dataset loaded" |
| |
| if analysis_type == "Summary Statistics": |
| return f"```\n{df.describe().to_string()}\n```" |
| elif analysis_type == "Missing Values": |
| missing = df.isnull().sum() |
| return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!" |
| elif analysis_type == "Data Types": |
| return f"```\n{df.dtypes.to_string()}\n```" |
| elif analysis_type == "Unique Values": |
| unique_counts = df.nunique() |
| return f"```\n{unique_counts.to_string()}\n```" |
| |
| |
| load_builtin_btn.click( |
| load_builtin_dataset, |
| inputs=[builtin_choice], |
| outputs=[data_preview, status_msg] |
| ).then( |
| lambda: gr.update(choices=list(datasets.keys())), |
| outputs=[dataset_selector] |
| ) |
| |
| upload_btn.click( |
| upload_dataset, |
| inputs=[file_upload, custom_name], |
| outputs=[data_preview, status_msg, dataset_selector] |
| ) |
| |
| |
| dataset_selector.change( |
| switch_dataset, |
| inputs=[dataset_selector], |
| outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df] |
| ) |
| |
| |
| plot_btn.click( |
| create_plot, |
| inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by], |
| outputs=[plot_output] |
| ) |
| |
| |
| for component in [plot_type, x_axis, y_axis, color_by, size_by]: |
| component.change( |
| create_plot, |
| inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by], |
| outputs=[plot_output] |
| ) |
| |
| |
| analyze_btn.click( |
| analyze_data, |
| inputs=[current_df, analysis_type], |
| outputs=[analysis_output] |
| ) |
| |
| |
| demo.load( |
| switch_dataset, |
| inputs=[dataset_selector], |
| outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df] |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(share=False, debug=True) |