mic3333 commited on
Commit
ac0793a
·
verified ·
1 Parent(s): f5b5911

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +296 -24
app.py CHANGED
@@ -1,26 +1,298 @@
1
- from dash import Dash, html, dcc, callback, Output, Input
2
  import plotly.express as px
3
  import pandas as pd
4
- import os
5
-
6
- df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
7
- app = Dash(__name__)
8
- server = app.server # Important for deployment
9
-
10
- app.layout = [
11
- html.H1(children='Title of Dash App', style={'textAlign':'center'}),
12
- dcc.Dropdown(df.country.unique(), 'Canada', id='dropdown-selection'),
13
- dcc.Graph(id='graph-content')
14
- ]
15
-
16
- @callback(
17
- Output('graph-content', 'figure'),
18
- Input('dropdown-selection', 'value')
19
- )
20
- def update_graph(value):
21
- dff = df[df.country==value]
22
- return px.line(dff, x='year', y='pop')
23
-
24
- if __name__ == '__main__':
25
- port = int(os.environ.get('PORT', 7860))
26
- app.run(host='0.0.0.0', port=port, debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
  import plotly.express as px
3
  import pandas as pd
4
+ import io
5
+
6
+ # Store datasets in a dictionary (acts as our "database")
7
+ datasets = {}
8
+
9
+ # Load default dataset
10
+ default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
11
+ datasets['Gapminder'] = default_df
12
+
13
+ # Function to load different built-in datasets
14
+ def load_builtin_dataset(dataset_name):
15
+ """Load various built-in datasets"""
16
+ try:
17
+ if dataset_name == "Gapminder":
18
+ df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv')
19
+ datasets[dataset_name] = df
20
+ return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
21
+
22
+ elif dataset_name == "Iris":
23
+ df = px.data.iris()
24
+ datasets[dataset_name] = df
25
+ return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
26
+
27
+ elif dataset_name == "Tips":
28
+ df = px.data.tips()
29
+ datasets[dataset_name] = df
30
+ return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
31
+
32
+ elif dataset_name == "Stock Data":
33
+ df = px.data.stocks()
34
+ # Reshape from wide to long format for better analysis
35
+ df = df.melt(id_vars='date', var_name='company', value_name='stock_price')
36
+ df['date'] = pd.to_datetime(df['date'])
37
+ datasets[dataset_name] = df
38
+ return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
39
+
40
+ elif dataset_name == "Wind Data":
41
+ df = px.data.wind()
42
+ datasets[dataset_name] = df
43
+ return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns"
44
+
45
+ except Exception as e:
46
+ return None, f"❌ Error loading {dataset_name}: {str(e)}"
47
+
48
+ # Function to handle file uploads
49
+ def upload_dataset(file, custom_name):
50
+ """Handle CSV/Excel file uploads"""
51
+ if file is None:
52
+ return None, "Please upload a file", gr.update(choices=list(datasets.keys()))
53
+
54
+ try:
55
+ # Determine file type and read accordingly
56
+ if file.name.endswith('.csv'):
57
+ df = pd.read_csv(file.name)
58
+ elif file.name.endswith(('.xlsx', '.xls')):
59
+ df = pd.read_excel(file.name)
60
+ else:
61
+ return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update()
62
+
63
+ # Store with custom name or filename
64
+ dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0]
65
+ datasets[dataset_name] = df
66
+
67
+ return df, f"✅ Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name)
68
+
69
+ except Exception as e:
70
+ return None, f"❌ Error reading file: {str(e)}", gr.update()
71
+
72
+ # Function to switch between datasets
73
+ def switch_dataset(dataset_name):
74
+ """Switch to a different dataset"""
75
+ if dataset_name in datasets:
76
+ df = datasets[dataset_name]
77
+ # Get column info
78
+ numeric_cols = df.select_dtypes(include=['number']).columns.tolist()
79
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist()
80
+ all_cols = df.columns.tolist()
81
+
82
+ info = f"""
83
+ ### Dataset: {dataset_name}
84
+ - **Rows**: {len(df)}
85
+ - **Columns**: {len(df.columns)}
86
+ - **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''}
87
+ - **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''}
88
+ """
89
+
90
+ return (
91
+ df.head(10), # Preview
92
+ info, # Info
93
+ gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), # X-axis
94
+ gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), # Y-axis
95
+ gr.update(choices=[""] + categorical_cols, value=""), # Color
96
+ gr.update(choices=[""] + numeric_cols, value=""), # Size
97
+ df # Store current df
98
+ )
99
+ else:
100
+ return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None
101
+
102
+ # Dynamic plotting function
103
+ def create_plot(df, plot_type, x_col, y_col, color_col, size_col):
104
+ """Create different plot types based on current dataset and selections"""
105
+ if df is None or x_col is None:
106
+ return None
107
+
108
+ try:
109
+ # Handle empty string selections
110
+ color_col = None if color_col == "" else color_col
111
+ size_col = None if size_col == "" else size_col
112
+
113
+ # Create different plot types
114
+ if plot_type == "Scatter":
115
+ fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col,
116
+ title=f"Scatter: {x_col} vs {y_col}")
117
+
118
+ elif plot_type == "Line":
119
+ fig = px.line(df, x=x_col, y=y_col, color=color_col,
120
+ title=f"Line: {x_col} vs {y_col}")
121
+
122
+ elif plot_type == "Bar":
123
+ # For bar charts, aggregate if necessary
124
+ if color_col:
125
+ fig = px.bar(df, x=x_col, y=y_col, color=color_col,
126
+ title=f"Bar: {x_col} vs {y_col}")
127
+ else:
128
+ fig = px.bar(df, x=x_col, y=y_col,
129
+ title=f"Bar: {x_col} vs {y_col}")
130
+
131
+ elif plot_type == "Histogram":
132
+ fig = px.histogram(df, x=x_col, color=color_col,
133
+ title=f"Histogram of {x_col}")
134
+
135
+ elif plot_type == "Box":
136
+ fig = px.box(df, x=x_col, y=y_col, color=color_col,
137
+ title=f"Box plot: {x_col} vs {y_col}")
138
+
139
+ elif plot_type == "Heatmap":
140
+ # Create correlation matrix for numeric columns
141
+ numeric_df = df.select_dtypes(include=['number'])
142
+ if len(numeric_df.columns) > 1:
143
+ corr = numeric_df.corr()
144
+ fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap")
145
+ else:
146
+ return None
147
+
148
+ fig.update_layout(height=500)
149
+ return fig
150
+
151
+ except Exception as e:
152
+ print(f"Plot error: {e}")
153
+ return None
154
+
155
+ # Create the Gradio interface
156
+ with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo:
157
+ gr.Markdown("""
158
+ # 📊 Dynamic Dataset Explorer
159
+ Upload your own data or explore built-in datasets with automatic visualization
160
+ """)
161
+
162
+ # Hidden state to store current dataframe
163
+ current_df = gr.State(value=default_df)
164
+
165
+ with gr.Tabs():
166
+ # Tab 1: Dataset Management
167
+ with gr.TabItem("📁 Dataset Management"):
168
+ with gr.Row():
169
+ with gr.Column(scale=1):
170
+ gr.Markdown("### Load Built-in Dataset")
171
+ builtin_choice = gr.Dropdown(
172
+ choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"],
173
+ value="Gapminder",
174
+ label="Select Dataset"
175
+ )
176
+ load_builtin_btn = gr.Button("Load Dataset", variant="primary")
177
+
178
+ gr.Markdown("### Upload Custom Dataset")
179
+ file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"])
180
+ custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset")
181
+ upload_btn = gr.Button("Upload", variant="primary")
182
+
183
+ gr.Markdown("### Active Datasets")
184
+ dataset_selector = gr.Dropdown(
185
+ choices=list(datasets.keys()),
186
+ value="Gapminder",
187
+ label="Switch Dataset"
188
+ )
189
+
190
+ with gr.Column(scale=2):
191
+ status_msg = gr.Markdown("Ready to load data")
192
+ data_info = gr.Markdown()
193
+ data_preview = gr.Dataframe(label="Data Preview (first 10 rows)")
194
+
195
+ # Tab 2: Dynamic Visualization
196
+ with gr.TabItem("📈 Visualization"):
197
+ with gr.Row():
198
+ with gr.Column(scale=1):
199
+ plot_type = gr.Radio(
200
+ choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"],
201
+ value="Scatter",
202
+ label="Plot Type"
203
+ )
204
+
205
+ x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True)
206
+ y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True)
207
+ color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True)
208
+ size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True)
209
+
210
+ plot_btn = gr.Button("Create Plot", variant="primary")
211
+
212
+ with gr.Column(scale=2):
213
+ plot_output = gr.Plot(label="Visualization")
214
+
215
+ # Tab 3: Data Analysis
216
+ with gr.TabItem("🔍 Data Analysis"):
217
+ with gr.Row():
218
+ with gr.Column():
219
+ analysis_type = gr.Radio(
220
+ choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"],
221
+ value="Summary Statistics",
222
+ label="Analysis Type"
223
+ )
224
+ analyze_btn = gr.Button("Analyze", variant="primary")
225
+
226
+ with gr.Column():
227
+ analysis_output = gr.Markdown()
228
+
229
+ def analyze_data(df, analysis_type):
230
+ """Perform different types of data analysis"""
231
+ if df is None:
232
+ return "No dataset loaded"
233
+
234
+ if analysis_type == "Summary Statistics":
235
+ return f"```\n{df.describe().to_string()}\n```"
236
+ elif analysis_type == "Missing Values":
237
+ missing = df.isnull().sum()
238
+ return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!"
239
+ elif analysis_type == "Data Types":
240
+ return f"```\n{df.dtypes.to_string()}\n```"
241
+ elif analysis_type == "Unique Values":
242
+ unique_counts = df.nunique()
243
+ return f"```\n{unique_counts.to_string()}\n```"
244
+
245
+ # Event handlers
246
+ load_builtin_btn.click(
247
+ load_builtin_dataset,
248
+ inputs=[builtin_choice],
249
+ outputs=[data_preview, status_msg]
250
+ ).then(
251
+ lambda: gr.update(choices=list(datasets.keys())),
252
+ outputs=[dataset_selector]
253
+ )
254
+
255
+ upload_btn.click(
256
+ upload_dataset,
257
+ inputs=[file_upload, custom_name],
258
+ outputs=[data_preview, status_msg, dataset_selector]
259
+ )
260
+
261
+ # When dataset is switched, update everything
262
+ dataset_selector.change(
263
+ switch_dataset,
264
+ inputs=[dataset_selector],
265
+ outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
266
+ )
267
+
268
+ # Create plot based on selections
269
+ plot_btn.click(
270
+ create_plot,
271
+ inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
272
+ outputs=[plot_output]
273
+ )
274
+
275
+ # Auto-update plot when parameters change
276
+ for component in [plot_type, x_axis, y_axis, color_by, size_by]:
277
+ component.change(
278
+ create_plot,
279
+ inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by],
280
+ outputs=[plot_output]
281
+ )
282
+
283
+ # Analysis
284
+ analyze_btn.click(
285
+ analyze_data,
286
+ inputs=[current_df, analysis_type],
287
+ outputs=[analysis_output]
288
+ )
289
+
290
+ # Load initial dataset
291
+ demo.load(
292
+ switch_dataset,
293
+ inputs=[dataset_selector],
294
+ outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df]
295
+ )
296
+
297
+ if __name__ == "__main__":
298
+ demo.launch(share=False, debug=True)