dash / app.py
mic3333's picture
Update app.py
ac0793a verified
import gradio as gr
import plotly.express as px
import pandas as pd
import io
# Store datasets in a dictionary (acts as our "database")
datasets = {}
# Load default dataset
default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets['Gapminder'] = default_df
# Function to load different built-in datasets
def load_builtin_dataset(dataset_name):
"""Load various built-in datasets"""
try:
if dataset_name == "Gapminder":
df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
datasets[dataset_name] = df
return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Iris":
df = px.data.iris()
datasets[dataset_name] = df
return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Tips":
df = px.data.tips()
datasets[dataset_name] = df
return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Stock Data":
df = px.data.stocks()
# Reshape from wide to long format for better analysis
df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
df['date'] = pd.to_datetime(df['date'])
datasets[dataset_name] = df
return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
elif dataset_name == "Wind Data":
df = px.data.wind()
datasets[dataset_name] = df
return df, f"βœ… Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
except Exception as e:
return None, f"❌ Error loading {dataset_name}: {str(e)}"
# Function to handle file uploads
def upload_dataset(file, custom_name):
"""Handle CSV/Excel file uploads"""
if file is None:
return None, "Please upload a file", gr.update(choices=list(datasets.keys()))
try:
# Determine file type and read accordingly
if file.name.endswith('.csv'):
df = pd.read_csv(file.name)
elif file.name.endswith(('.xlsx', '.xls')):
df = pd.read_excel(file.name)
else:
return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update()
# Store with custom name or filename
dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
datasets[dataset_name] = df
return df, f"βœ… Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)
except Exception as e:
return None, f"❌ Error reading file: {str(e)}", gr.update()
# Function to switch between datasets
def switch_dataset(dataset_name):
"""Switch to a different dataset"""
if dataset_name in datasets:
df = datasets[dataset_name]
# Get column info
numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
all_cols = df.columns.tolist()
info = f"""
### Dataset: {dataset_name}
- **Rows**: {len(df)}
- **Columns**: {len(df.columns)}
- **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
- **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
"""
return (
df.head(10), # Preview
info, # Info
gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), # X-axis
gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), # Y-axis
gr.update(choices=[""] + categorical_cols, value=""), # Color
gr.update(choices=[""] + numeric_cols, value=""), # Size
df # Store current df
)
else:
return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None
# Dynamic plotting function
def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
"""Create different plot types based on current dataset and selections"""
if df is None or x_col is None:
return None
try:
# Handle empty string selections
color_col = None if color_col == "" else color_col
size_col = None if size_col == "" else size_col
# Create different plot types
if plot_type == "Scatter":
fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
title=f"Scatter: {x_col} vs {y_col}")
elif plot_type == "Line":
fig = px.line(df, x=x_col, y=y_col, color=color_col,
title=f"Line: {x_col} vs {y_col}")
elif plot_type == "Bar":
# For bar charts, aggregate if necessary
if color_col:
fig = px.bar(df, x=x_col, y=y_col, color=color_col,
title=f"Bar: {x_col} vs {y_col}")
else:
fig = px.bar(df, x=x_col, y=y_col,
title=f"Bar: {x_col} vs {y_col}")
elif plot_type == "Histogram":
fig = px.histogram(df, x=x_col, color=color_col,
title=f"Histogram of {x_col}")
elif plot_type == "Box":
fig = px.box(df, x=x_col, y=y_col, color=color_col,
title=f"Box plot: {x_col} vs {y_col}")
elif plot_type == "Heatmap":
# Create correlation matrix for numeric columns
numeric_df = df.select_dtypes(include=['number'])
if len(numeric_df.columns) > 1:
corr = numeric_df.corr()
fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
else:
return None
fig.update_layout(height=500)
return fig
except Exception as e:
print(f"Plot error: {e}")
return None
# Create the Gradio interface
with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# πŸ“Š Dynamic Dataset Explorer
Upload your own data or explore built-in datasets with automatic visualization
""")
# Hidden state to store current dataframe
current_df = gr.State(value=default_df)
with gr.Tabs():
# Tab 1: Dataset Management
with gr.TabItem("πŸ“ Dataset Management"):
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Load Built-in Dataset")
builtin_choice = gr.Dropdown(
choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
value="Gapminder",
label="Select Dataset"
)
load_builtin_btn = gr.Button("Load Dataset", variant="primary")
gr.Markdown("### Upload Custom Dataset")
file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
upload_btn = gr.Button("Upload", variant="primary")
gr.Markdown("### Active Datasets")
dataset_selector = gr.Dropdown(
choices=list(datasets.keys()),
value="Gapminder",
label="Switch Dataset"
)
with gr.Column(scale=2):
status_msg = gr.Markdown("Ready to load data")
data_info = gr.Markdown()
data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")
# Tab 2: Dynamic Visualization
with gr.TabItem("πŸ“ˆ Visualization"):
with gr.Row():
with gr.Column(scale=1):
plot_type = gr.Radio(
choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
value="Scatter",
label="Plot Type"
)
x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)
plot_btn = gr.Button("Create Plot", variant="primary")
with gr.Column(scale=2):
plot_output = gr.Plot(label="Visualization")
# Tab 3: Data Analysis
with gr.TabItem("πŸ” Data Analysis"):
with gr.Row():
with gr.Column():
analysis_type = gr.Radio(
choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
value="Summary Statistics",
label="Analysis Type"
)
analyze_btn = gr.Button("Analyze", variant="primary")
with gr.Column():
analysis_output = gr.Markdown()
def analyze_data(df, analysis_type):
"""Perform different types of data analysis"""
if df is None:
return "No dataset loaded"
if analysis_type == "Summary Statistics":
return f"```\n{df.describe().to_string()}\n```"
elif analysis_type == "Missing Values":
missing = df.isnull().sum()
return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
elif analysis_type == "Data Types":
return f"```\n{df.dtypes.to_string()}\n```"
elif analysis_type == "Unique Values":
unique_counts = df.nunique()
return f"```\n{unique_counts.to_string()}\n```"
# Event handlers
load_builtin_btn.click(
load_builtin_dataset,
inputs=[builtin_choice],
outputs=[data_preview, status_msg]
).then(
lambda: gr.update(choices=list(datasets.keys())),
outputs=[dataset_selector]
)
upload_btn.click(
upload_dataset,
inputs=[file_upload, custom_name],
outputs=[data_preview, status_msg, dataset_selector]
)
# When dataset is switched, update everything
dataset_selector.change(
switch_dataset,
inputs=[dataset_selector],
outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
)
# Create plot based on selections
plot_btn.click(
create_plot,
inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
outputs=[plot_output]
)
# Auto-update plot when parameters change
for component in [plot_type, x_axis, y_axis, color_by, size_by]:
component.change(
create_plot,
inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
outputs=[plot_output]
)
# Analysis
analyze_btn.click(
analyze_data,
inputs=[current_df, analysis_type],
outputs=[analysis_output]
)
# Load initial dataset
demo.load(
switch_dataset,
inputs=[dataset_selector],
outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
)
if __name__ == "__main__":
demo.launch(share=False, debug=True)