| import base64 |
| import io |
| import ast |
| import traceback |
| import os |
| from threading import Thread |
|
|
| import dash |
| from dash import dcc, html, Input, Output, State, callback_context |
| import dash_bootstrap_components as dbc |
| import pandas as pd |
| import plotly.graph_objs as go |
| import google.generativeai as genai |
|
|
| |
| app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) |
|
|
| |
| app.layout = dbc.Container([ |
| html.H1("Data Analysis Dashboard", className="my-4"), |
| dbc.Card([ |
| dbc.CardBody([ |
| dcc.Upload( |
| id='upload-data', |
| children=html.Div([ |
| 'Drag and Drop or ', |
| html.A('Select Files') |
| ]), |
| style={ |
| 'width': '100%', |
| 'height': '60px', |
| 'lineHeight': '60px', |
| 'borderWidth': '1px', |
| 'borderStyle': 'dashed', |
| 'borderRadius': '5px', |
| 'textAlign': 'center', |
| 'margin': '10px' |
| }, |
| multiple=False |
| ), |
| html.Div(id='upload-feedback', className="mt-2"), |
| html.Div([ |
| html.Span(id='filename-display', className="mr-2"), |
| dbc.Button("Delete File", id="delete-file-button", color="danger", className="mt-2", style={'display': 'none'}) |
| ], className="mt-2"), |
| dbc.Input(id="instructions", placeholder="Describe the analysis you want...", type="text", className="mt-3"), |
| dbc.Button("Generate Insights", id="submit-button", color="primary", className="mt-3"), |
| ]) |
| ], className="mb-4"), |
| html.Div(id="error-message", className="text-danger mb-3"), |
| dcc.Loading( |
| id="loading-visualizations", |
| type="default", |
| children=[ |
| dbc.Card([ |
| dbc.CardBody([ |
| dcc.Graph(id='visualization-1'), |
| dcc.Graph(id='visualization-2'), |
| dcc.Graph(id='visualization-3'), |
| ]) |
| ]) |
| ] |
| ), |
| dcc.Store(id='uploaded-data') |
| ], fluid=True) |
|
|
| def parse_contents(contents, filename): |
| content_type, content_string = contents.split(',') |
| decoded = base64.b64decode(content_string) |
| try: |
| if 'csv' in filename: |
| df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) |
| elif 'xls' in filename: |
| df = pd.read_excel(io.BytesIO(decoded)) |
| else: |
| return None |
| return df |
| except Exception as e: |
| print(e) |
| return None |
|
|
| def process_data(df, instructions): |
| try: |
| |
| api_key = os.getenv('GEMINI_API_KEY') |
| if not api_key: |
| raise ValueError("Gemini API key not found in environment variables") |
|
|
| |
| genai.configure(api_key=api_key) |
| model = genai.GenerativeModel('gemini-2.5-pro-preview-03-25') |
| |
| |
| response = model.generate_content(f""" |
| Analyze the following dataset and instructions: |
| |
| Data columns: {list(df.columns)} |
| Data shape: {df.shape} |
| Instructions: {instructions} |
| |
| Based on this, create 3 appropriate visualizations that provide meaningful insights. For each visualization: |
| 1. Choose the most suitable plot type (bar, line, scatter, hist, pie, heatmap) |
| 2. Determine appropriate data aggregation (e.g., top 5 categories, yearly averages) |
| 3. Select relevant columns for x-axis, y-axis, and any additional dimensions (color, size) |
| 4. Provide a clear, concise title that explains the insight |
| Consider data density and choose visualizations that simplify and clarify the information. |
| Limit the number of data points displayed to ensure readability (e.g., top 5, top 10, yearly). |
| |
| Return your response as a Python list of dictionaries: |
| [ |
| {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "agg_func": "...", "top_n": ..., "additional": {{"color": "...", "size": "..."}}}}, |
| {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "agg_func": "...", "top_n": ..., "additional": {{"color": "...", "size": "..."}}}}, |
| {{"title": "...", "plot_type": "...", "x": "...", "y": "...", "agg_func": "...", "top_n": ..., "additional": {{"color": "...", "size": "..."}}}} |
| ] |
| """) |
|
|
| |
| code_block = response.text |
| if '```python' in code_block: |
| code_block = code_block.split('```python')[1].split('```')[0].strip() |
| elif '```' in code_block: |
| code_block = code_block.split('```')[1].strip() |
| |
| plots = ast.literal_eval(code_block) |
| return plots |
| except Exception as e: |
| print(f"Error in process_data: {str(e)}") |
| return None |
|
|
| def generate_plot(df, plot_info): |
| plot_df = df.copy() |
| if plot_info['agg_func'] == 'sum': |
| plot_df = plot_df.groupby(plot_info['x'])[plot_info['y']].sum().reset_index() |
| elif plot_info['agg_func'] == 'mean': |
| plot_df = plot_df.groupby(plot_info['x'])[plot_info['y']].mean().reset_index() |
| elif plot_info['agg_func'] == 'count': |
| plot_df = plot_df.groupby(plot_info['x']).size().reset_index(name=plot_info['y']) |
| |
| if 'top_n' in plot_info and plot_info['top_n']: |
| plot_df = plot_df.nlargest(plot_info['top_n'], plot_info['y']) |
| |
| if plot_info['plot_type'] == 'bar': |
| fig = go.Figure(go.Bar(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']])) |
| elif plot_info['plot_type'] == 'line': |
| fig = go.Figure(go.Scatter(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']], mode='lines')) |
| elif plot_info['plot_type'] == 'scatter': |
| fig = go.Figure(go.Scatter(x=plot_df[plot_info['x']], y=plot_df[plot_info['y']], mode='markers')) |
| elif plot_info['plot_type'] == 'hist': |
| fig = go.Figure(go.Histogram(x=plot_df[plot_info['x']])) |
| elif plot_info['plot_type'] == 'pie': |
| fig = go.Figure(go.Pie(labels=plot_df[plot_info['x']], values=plot_df[plot_info['y']])) |
| elif plot_info['plot_type'] == 'heatmap': |
| pivot_df = plot_df.pivot(index=plot_info['x'], columns=plot_info['additional']['color'], values=plot_info['y']) |
| fig = go.Figure(go.Heatmap(z=pivot_df.values, x=pivot_df.columns, y=pivot_df.index)) |
| |
| fig.update_layout(title=plot_info['title'], xaxis_title=plot_info['x'], yaxis_title=plot_info['y']) |
| return fig |
|
|
| @app.callback( |
| [Output('upload-feedback', 'children'), |
| Output('filename-display', 'children'), |
| Output('delete-file-button', 'style'), |
| Output('uploaded-data', 'data')], |
| [Input('upload-data', 'contents'), |
| Input('delete-file-button', 'n_clicks')], |
| [State('upload-data', 'filename')] |
| ) |
| def update_upload_feedback(contents, delete_clicks, filename): |
| ctx = callback_context |
| if not ctx.triggered: |
| return dash.no_update, dash.no_update, dash.no_update, dash.no_update |
| |
| trigger_id = ctx.triggered[0]['prop_id'].split('.')[0] |
|
|
| if trigger_id == 'delete-file-button': |
| return "File deleted.", "", {'display': 'none'}, None |
| |
| if contents is not None: |
| df = parse_contents(contents, filename) |
| if df is not None: |
| return ( |
| dbc.Alert("File uploaded successfully!", color="success"), |
| f"Uploaded: {filename}", |
| {'display': 'inline-block'}, |
| contents |
| ) |
| else: |
| return ( |
| dbc.Alert("Error parsing the file. Please upload a valid CSV or Excel file.", color="danger"), |
| "", |
| {'display': 'none'}, |
| None |
| ) |
| |
| return dash.no_update, dash.no_update, dash.no_update, dash.no_update |
|
|
| @app.callback( |
| [Output('visualization-1', 'figure'), |
| Output('visualization-2', 'figure'), |
| Output('visualization-3', 'figure'), |
| Output('error-message', 'children')], |
| [Input('submit-button', 'n_clicks')], |
| [State('uploaded-data', 'data'), |
| State('upload-data', 'filename'), |
| State('instructions', 'value')] |
| ) |
| def update_output(n_clicks, contents, filename, instructions): |
| if n_clicks is None or contents is None: |
| return dash.no_update, dash.no_update, dash.no_update, "" |
|
|
| try: |
| df = parse_contents(contents, filename) |
| if df is None: |
| return dash.no_update, dash.no_update, dash.no_update, "Unable to parse the uploaded file." |
|
|
| plots = process_data(df, instructions) |
| if plots is None or len(plots) < 3: |
| return dash.no_update, dash.no_update, dash.no_update, "Unable to generate visualizations. Please check your instructions and try again." |
|
|
| figures = [generate_plot(df, plot_info) for plot_info in plots[:3]] |
| return figures[0], figures[1], figures[2], "" |
| except Exception as e: |
| error_message = f"An error occurred: {str(e)}" |
| return dash.no_update, dash.no_update, dash.no_update, error_message |
|
|
| if __name__ == '__main__': |
| print("Starting the Dash application...") |
| app.run(debug=False, host='0.0.0.0', port=7860) |
| print("Dash application has finished running.") |