Spaces:

mic3333
/

dash

Sleeping

App Files Files Community

dash / app.py

mic3333

Update app.py

ac0793a verified 8 months ago

raw

history blame contribute delete

12.3 kB

	import gradio as gr
	import plotly.express as px
	import pandas as pd
	import io

	# Store datasets in a dictionary (acts as our "database")
	datasets = {}

	# Load default dataset
	default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
	datasets['Gapminder'] = default_df

	# Function to load different built-in datasets
	def load_builtin_dataset(dataset_name):
	"""Load various built-in datasets"""
	try:
	if dataset_name == "Gapminder":
	df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
	datasets[dataset_name] = df
	return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"

	elif dataset_name == "Iris":
	df = px.data.iris()
	datasets[dataset_name] = df
	return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"

	elif dataset_name == "Tips":
	df = px.data.tips()
	datasets[dataset_name] = df
	return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"

	elif dataset_name == "Stock Data":
	df = px.data.stocks()
	# Reshape from wide to long format for better analysis
	df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
	df['date'] = pd.to_datetime(df['date'])
	datasets[dataset_name] = df
	return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"

	elif dataset_name == "Wind Data":
	df = px.data.wind()
	datasets[dataset_name] = df
	return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"

	except Exception as e:
	return None, f"❌ Error loading {dataset_name}: {str(e)}"

	# Function to handle file uploads
	def upload_dataset(file, custom_name):
	"""Handle CSV/Excel file uploads"""
	if file is None:
	return None, "Please upload a file", gr.update(choices=list(datasets.keys()))

	try:
	# Determine file type and read accordingly
	if file.name.endswith('.csv'):
	df = pd.read_csv(file.name)
	elif file.name.endswith(('.xlsx', '.xls')):
	df = pd.read_excel(file.name)
	else:
	return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update()

	# Store with custom name or filename
	dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
	datasets[dataset_name] = df

	return df, f"✅ Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)

	except Exception as e:
	return None, f"❌ Error reading file: {str(e)}", gr.update()

	# Function to switch between datasets
	def switch_dataset(dataset_name):
	"""Switch to a different dataset"""
	if dataset_name in datasets:
	df = datasets[dataset_name]
	# Get column info
	numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
	categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
	all_cols = df.columns.tolist()

	info = f"""
	### Dataset: {dataset_name}
	- Rows: {len(df)}
	- Columns: {len(df.columns)}
	- Numeric columns: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
	- Categorical columns: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
	"""

	return (
	df.head(10), # Preview
	info, # Info
	gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), # X-axis
	gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), # Y-axis
	gr.update(choices=[""] + categorical_cols, value=""), # Color
	gr.update(choices=[""] + numeric_cols, value=""), # Size
	df # Store current df
	)
	else:
	return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None

	# Dynamic plotting function
	def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
	"""Create different plot types based on current dataset and selections"""
	if df is None or x_col is None:
	return None

	try:
	# Handle empty string selections
	color_col = None if color_col == "" else color_col
	size_col = None if size_col == "" else size_col

	# Create different plot types
	if plot_type == "Scatter":
	fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
	title=f"Scatter: {x_col} vs {y_col}")

	elif plot_type == "Line":
	fig = px.line(df, x=x_col, y=y_col, color=color_col,
	title=f"Line: {x_col} vs {y_col}")

	elif plot_type == "Bar":
	# For bar charts, aggregate if necessary
	if color_col:
	fig = px.bar(df, x=x_col, y=y_col, color=color_col,
	title=f"Bar: {x_col} vs {y_col}")
	else:
	fig = px.bar(df, x=x_col, y=y_col,
	title=f"Bar: {x_col} vs {y_col}")

	elif plot_type == "Histogram":
	fig = px.histogram(df, x=x_col, color=color_col,
	title=f"Histogram of {x_col}")

	elif plot_type == "Box":
	fig = px.box(df, x=x_col, y=y_col, color=color_col,
	title=f"Box plot: {x_col} vs {y_col}")

	elif plot_type == "Heatmap":
	# Create correlation matrix for numeric columns
	numeric_df = df.select_dtypes(include=['number'])
	if len(numeric_df.columns) > 1:
	corr = numeric_df.corr()
	fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
	else:
	return None

	fig.update_layout(height=500)
	return fig

	except Exception as e:
	print(f"Plot error: {e}")
	return None

	# Create the Gradio interface
	with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
	gr.Markdown("""
	# 📊 Dynamic Dataset Explorer
	Upload your own data or explore built-in datasets with automatic visualization
	""")

	# Hidden state to store current dataframe
	current_df = gr.State(value=default_df)

	with gr.Tabs():
	# Tab 1: Dataset Management
	with gr.TabItem("📁 Dataset Management"):
	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### Load Built-in Dataset")
	builtin_choice = gr.Dropdown(
	choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
	value="Gapminder",
	label="Select Dataset"
	)
	load_builtin_btn = gr.Button("Load Dataset", variant="primary")

	gr.Markdown("### Upload Custom Dataset")
	file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
	custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
	upload_btn = gr.Button("Upload", variant="primary")

	gr.Markdown("### Active Datasets")
	dataset_selector = gr.Dropdown(
	choices=list(datasets.keys()),
	value="Gapminder",
	label="Switch Dataset"
	)

	with gr.Column(scale=2):
	status_msg = gr.Markdown("Ready to load data")
	data_info = gr.Markdown()
	data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")

	# Tab 2: Dynamic Visualization
	with gr.TabItem("📈 Visualization"):
	with gr.Row():
	with gr.Column(scale=1):
	plot_type = gr.Radio(
	choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
	value="Scatter",
	label="Plot Type"
	)

	x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
	y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
	color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
	size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)

	plot_btn = gr.Button("Create Plot", variant="primary")

	with gr.Column(scale=2):
	plot_output = gr.Plot(label="Visualization")

	# Tab 3: Data Analysis
	with gr.TabItem("🔍 Data Analysis"):
	with gr.Row():
	with gr.Column():
	analysis_type = gr.Radio(
	choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
	value="Summary Statistics",
	label="Analysis Type"
	)
	analyze_btn = gr.Button("Analyze", variant="primary")

	with gr.Column():
	analysis_output = gr.Markdown()

	def analyze_data(df, analysis_type):
	"""Perform different types of data analysis"""
	if df is None:
	return "No dataset loaded"

	if analysis_type == "Summary Statistics":
	return f"```\n{df.describe().to_string()}\n```"
	elif analysis_type == "Missing Values":
	missing = df.isnull().sum()
	return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
	elif analysis_type == "Data Types":
	return f"```\n{df.dtypes.to_string()}\n```"
	elif analysis_type == "Unique Values":
	unique_counts = df.nunique()
	return f"```\n{unique_counts.to_string()}\n```"

	# Event handlers
	load_builtin_btn.click(
	load_builtin_dataset,
	inputs=[builtin_choice],
	outputs=[data_preview, status_msg]
	).then(
	lambda: gr.update(choices=list(datasets.keys())),
	outputs=[dataset_selector]
	)

	upload_btn.click(
	upload_dataset,
	inputs=[file_upload, custom_name],
	outputs=[data_preview, status_msg, dataset_selector]
	)

	# When dataset is switched, update everything
	dataset_selector.change(
	switch_dataset,
	inputs=[dataset_selector],
	outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
	)

	# Create plot based on selections
	plot_btn.click(
	create_plot,
	inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
	outputs=[plot_output]
	)

	# Auto-update plot when parameters change
	for component in [plot_type, x_axis, y_axis, color_by, size_by]:
	component.change(
	create_plot,
	inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
	outputs=[plot_output]
	)

	# Analysis
	analyze_btn.click(
	analyze_data,
	inputs=[current_df, analysis_type],
	outputs=[analysis_output]
	)

	# Load initial dataset
	demo.load(
	switch_dataset,
	inputs=[dataset_selector],
	outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
	)

	if __name__ == "__main__":
	demo.launch(share=False, debug=True)