sanjaystarc commited on
Commit
e1c1a1e
·
verified ·
1 Parent(s): 361e559

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +515 -0
  2. core_agent.py +408 -0
  3. requirements.txt +14 -0
  4. sample_data.csv +31 -0
app.py ADDED
@@ -0,0 +1,515 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app.py
3
+ ======
4
+ Streamlit UI — Data Analyst Agent (LangChain + Gemini)
5
+ Run: streamlit run app.py
6
+ """
7
+
8
+ import os
9
+ import io
10
+ import json
11
+ import streamlit as st
12
+ import pandas as pd
13
+ import plotly.express as px
14
+
15
+ from core_agent import (
16
+ get_llm, load_file, profile_dataframe, profile_to_text,
17
+ set_dataframe, build_agent, run_agent,
18
+ auto_suggest_charts, make_plotly_chart, recommend_chart
19
+ )
20
+
21
+ # ─── Page Config ──────────────────────────────────────────────────────────────
22
+ st.set_page_config(
23
+ page_title="DataMind Agent",
24
+ page_icon="🧠",
25
+ layout="wide",
26
+ initial_sidebar_state="expanded",
27
+ )
28
+
29
+ # ─── Custom CSS ───────────────────────────────────────────────────────────────
30
+ st.markdown("""
31
+ <style>
32
+ @import url('https://fonts.googleapis.com/css2?family=Syne:wght@400;700;800&family=DM+Sans:wght@300;400;500&display=swap');
33
+
34
+ html, body, [class*="css"] {
35
+ font-family: 'DM Sans', sans-serif;
36
+ background-color: #0a0a12;
37
+ color: #e8e8ff;
38
+ }
39
+
40
+ .main { background-color: #0a0a12 !important; }
41
+
42
+ /* Header */
43
+ .hero-title {
44
+ font-family: 'Syne', sans-serif;
45
+ font-size: 2.8rem;
46
+ font-weight: 800;
47
+ background: linear-gradient(135deg, #e8e8ff 0%, #6C63FF 50%, #43E97B 100%);
48
+ -webkit-background-clip: text;
49
+ -webkit-text-fill-color: transparent;
50
+ background-clip: text;
51
+ margin-bottom: 0.2rem;
52
+ }
53
+ .hero-sub {
54
+ color: #6a6a9a;
55
+ font-size: 1rem;
56
+ margin-bottom: 2rem;
57
+ }
58
+
59
+ /* Cards */
60
+ .stat-card {
61
+ background: #1a1a2e;
62
+ border: 1px solid #2a2a45;
63
+ border-radius: 16px;
64
+ padding: 1.2rem 1.5rem;
65
+ text-align: center;
66
+ }
67
+ .stat-num {
68
+ font-family: 'Syne', sans-serif;
69
+ font-size: 2rem;
70
+ font-weight: 800;
71
+ color: #6C63FF;
72
+ }
73
+ .stat-label { color: #6a6a9a; font-size: 0.8rem; text-transform: uppercase; letter-spacing: 0.1em; }
74
+
75
+ /* Chat bubbles */
76
+ .user-bubble {
77
+ background: rgba(108,99,255,0.15);
78
+ border: 1px solid rgba(108,99,255,0.3);
79
+ border-radius: 18px 18px 4px 18px;
80
+ padding: 0.9rem 1.2rem;
81
+ margin: 0.5rem 0;
82
+ font-size: 0.95rem;
83
+ }
84
+ .agent-bubble {
85
+ background: #1a1a2e;
86
+ border: 1px solid #2a2a45;
87
+ border-radius: 18px 18px 18px 4px;
88
+ padding: 0.9rem 1.2rem;
89
+ margin: 0.5rem 0;
90
+ font-size: 0.95rem;
91
+ line-height: 1.6;
92
+ }
93
+
94
+ /* Sidebar */
95
+ section[data-testid="stSidebar"] {
96
+ background: #10101e !important;
97
+ border-right: 1px solid #2a2a45;
98
+ }
99
+
100
+ /* Buttons */
101
+ .stButton > button {
102
+ background: linear-gradient(135deg, #6C63FF, #43E97B);
103
+ color: white;
104
+ border: none;
105
+ border-radius: 12px;
106
+ font-family: 'Syne', sans-serif;
107
+ font-weight: 700;
108
+ padding: 0.6rem 1.5rem;
109
+ transition: opacity 0.2s, transform 0.2s;
110
+ }
111
+ .stButton > button:hover { opacity: 0.85; color: white; transform: translateY(-1px); }
112
+
113
+ .stTextInput > div > div > input {
114
+ background: #1a1a2e;
115
+ border: 1px solid #2a2a45;
116
+ border-radius: 12px;
117
+ color: #e8e8ff;
118
+ }
119
+ .stSelectbox > div > div {
120
+ background: #1a1a2e;
121
+ border: 1px solid #2a2a45;
122
+ border-radius: 12px;
123
+ }
124
+
125
+ /* Tabs */
126
+ .stTabs [data-baseweb="tab-list"] {
127
+ background: #10101e;
128
+ border-radius: 12px;
129
+ gap: 0.3rem;
130
+ padding: 0.3rem;
131
+ }
132
+ .stTabs [data-baseweb="tab"] {
133
+ background: transparent;
134
+ color: #6a6a9a;
135
+ border-radius: 10px;
136
+ font-family: 'Syne', sans-serif;
137
+ }
138
+ .stTabs [aria-selected="true"] {
139
+ background: rgba(108,99,255,0.2) !important;
140
+ color: #6C63FF !important;
141
+ }
142
+
143
+ /* Dataframe */
144
+ .stDataFrame { border-radius: 12px; overflow: hidden; }
145
+
146
+ /* Info / success boxes */
147
+ .stAlert { border-radius: 12px; }
148
+ </style>""", unsafe_allow_html=True)
149
+
150
+ # ─── Session State ────────────────────────────────────────────────────────────
151
+ for key, default in {
152
+ "df": None,
153
+ "profile": None,
154
+ "file_type": None,
155
+ "chat_history": [],
156
+ "llm": None,
157
+ "agent_executor": None,
158
+ "api_key_set": False,
159
+ }.items():
160
+ if key not in st.session_state:
161
+ st.session_state[key] = default
162
+
163
+
164
+ # ─── Sidebar ──────────────────────────────────────────────────────────────────
165
+ with st.sidebar:
166
+ st.markdown("### 🧠 DataMind Agent")
167
+ st.markdown("---")
168
+
169
+ # API Key
170
+ st.markdown("**🔑 Gemini API Key**")
171
+ api_key = st.text_input(
172
+ "Enter your key", type="password",
173
+ placeholder="AIza...",
174
+ help="Get free key at aistudio.google.com",
175
+ label_visibility="collapsed"
176
+ )
177
+ if api_key:
178
+ if not st.session_state.api_key_set or st.session_state.get("_last_key") != api_key:
179
+ try:
180
+ st.session_state.llm = get_llm(api_key)
181
+ st.session_state.agent_executor = build_agent(st.session_state.llm)
182
+ st.session_state.api_key_set = True
183
+ st.session_state["_last_key"] = api_key
184
+ st.success("✅ Connected to Gemini!")
185
+ except Exception as e:
186
+ st.error(f"❌ Invalid key: {e}")
187
+
188
+ st.markdown("---")
189
+
190
+ # File Upload
191
+ st.markdown("**📁 Upload Data File**")
192
+ uploaded = st.file_uploader(
193
+ "Upload", type=["csv", "xlsx", "xls", "json"],
194
+ label_visibility="collapsed"
195
+ )
196
+
197
+ if uploaded and st.session_state.api_key_set:
198
+ with st.spinner("📊 Analyzing your data..."):
199
+ try:
200
+ df, ftype = load_file(uploaded)
201
+ profile = profile_dataframe(df)
202
+ st.session_state.df = df
203
+ st.session_state.file_type = ftype
204
+ st.session_state.profile = profile
205
+ st.session_state.chat_history = []
206
+ set_dataframe(df, profile)
207
+ st.success(f"✅ Loaded {ftype} file!")
208
+ except Exception as e:
209
+ st.error(f"❌ Error: {e}")
210
+
211
+ elif uploaded and not st.session_state.api_key_set:
212
+ st.warning("⚠️ Enter your Gemini API key first")
213
+
214
+ st.markdown("---")
215
+ st.markdown("""
216
+ **How to use:**
217
+ 1. Paste your Gemini API key above
218
+ 2. Upload CSV, Excel, or JSON file
219
+ 3. Explore the Dashboard tab
220
+ 4. Ask questions in Chat tab
221
+ 5. Generate visuals in Charts tab
222
+
223
+ ---
224
+ **Get free Gemini API key:**
225
+ [aistudio.google.com](https://aistudio.google.com/app/apikey)
226
+ """)
227
+
228
+
229
+ # ─── Main Content ─────────────────────────────────────────────────────────────
230
+ st.markdown('<div class="hero-title">🧠 DataMind Agent</div>', unsafe_allow_html=True)
231
+ st.markdown('<div class="hero-sub">AI-powered data analysis using LangChain + Gemini · Upload any data file and start exploring</div>', unsafe_allow_html=True)
232
+
233
+ if st.session_state.df is None:
234
+ # Landing state
235
+ col1, col2, col3 = st.columns(3)
236
+ with col1:
237
+ st.markdown("""
238
+ <div class="stat-card">
239
+ <div class="stat-num">📂</div>
240
+ <div class="stat-label">CSV, Excel, JSON</div>
241
+ <br><p style="color:#6a6a9a; font-size:0.85rem">Upload any tabular data file — we handle the parsing automatically</p>
242
+ </div>""", unsafe_allow_html=True)
243
+ with col2:
244
+ st.markdown("""
245
+ <div class="stat-card">
246
+ <div class="stat-num">💬</div>
247
+ <div class="stat-label">Natural Language Q&A</div>
248
+ <br><p style="color:#6a6a9a; font-size:0.85rem">Ask anything about your data in plain English — no SQL needed</p>
249
+ </div>""", unsafe_allow_html=True)
250
+ with col3:
251
+ st.markdown("""
252
+ <div class="stat-card">
253
+ <div class="stat-num">📊</div>
254
+ <div class="stat-label">Smart Visualizations</div>
255
+ <br><p style="color:#6a6a9a; font-size:0.85rem">AI picks the right chart for your question automatically</p>
256
+ </div>""", unsafe_allow_html=True)
257
+
258
+ st.markdown("<br>", unsafe_allow_html=True)
259
+ st.info("👈 Enter your Gemini API key and upload a data file in the sidebar to get started!")
260
+
261
+ else:
262
+ df = st.session_state.df
263
+ profile = st.session_state.profile
264
+ llm = st.session_state.llm
265
+
266
+ # ── Tabs ─────────────────────────────────────────────────────────────────
267
+ tab1, tab2, tab3, tab4 = st.tabs(["📊 Dashboard", "💬 Chat", "🎨 Charts", "🔍 Raw Data"])
268
+
269
+ # ════════════════════════════════════════════════════════════════
270
+ # TAB 1 — Dashboard
271
+ # ════════════════════════════════════════════════════════════════
272
+ with tab1:
273
+ rows, cols = profile["shape"]
274
+ nulls = sum(profile["null_counts"].values())
275
+ num_c = len(profile["numeric_columns"])
276
+ cat_c = len(profile["categorical_columns"])
277
+
278
+ c1, c2, c3, c4 = st.columns(4)
279
+ c1.markdown(f'<div class="stat-card"><div class="stat-num">{rows:,}</div><div class="stat-label">Rows</div></div>', unsafe_allow_html=True)
280
+ c2.markdown(f'<div class="stat-card"><div class="stat-num">{cols}</div><div class="stat-label">Columns</div></div>', unsafe_allow_html=True)
281
+ c3.markdown(f'<div class="stat-card"><div class="stat-num">{num_c}</div><div class="stat-label">Numeric Cols</div></div>', unsafe_allow_html=True)
282
+ c4.markdown(f'<div class="stat-card"><div class="stat-num">{nulls}</div><div class="stat-label">Missing Values</div></div>', unsafe_allow_html=True)
283
+
284
+ st.markdown("<br>", unsafe_allow_html=True)
285
+
286
+ # Column overview
287
+ st.markdown("#### 📋 Column Overview")
288
+ col_info = pd.DataFrame({
289
+ "Column": df.columns,
290
+ "Type": df.dtypes.astype(str).values,
291
+ "Non-Null": df.notnull().sum().values,
292
+ "Null %": (df.isnull().mean() * 100).round(1).values,
293
+ "Unique": df.nunique().values,
294
+ })
295
+ st.dataframe(col_info, use_container_width=True, hide_index=True)
296
+
297
+ # Auto charts
298
+ st.markdown("#### 🤖 Auto-Generated Insights")
299
+ suggested = auto_suggest_charts(profile)[:3]
300
+
301
+ chart_cols = st.columns(min(len(suggested), 2))
302
+ for i, ctype in enumerate(suggested[:2]):
303
+ with chart_cols[i]:
304
+ try:
305
+ fig = make_plotly_chart(ctype, df, profile)
306
+ st.plotly_chart(fig, use_container_width=True)
307
+ except Exception as e:
308
+ st.warning(f"Could not render {ctype}: {e}")
309
+
310
+ if len(suggested) > 2:
311
+ try:
312
+ fig = make_plotly_chart(suggested[2], df, profile)
313
+ st.plotly_chart(fig, use_container_width=True)
314
+ except Exception:
315
+ pass
316
+
317
+ # AI summary
318
+ st.markdown("#### 🧠 AI Dataset Summary")
319
+ if st.button("✨ Generate AI Summary"):
320
+ with st.spinner("🤖 Agent is generating full report..."):
321
+ set_dataframe(df, profile)
322
+ result = run_agent(
323
+ "Give me a full insight report on this dataset with key patterns, anomalies, and actionable recommendations.",
324
+ st.session_state.agent_executor, []
325
+ )
326
+ st.markdown(f'<div class="agent-bubble">{result["output"]}</div>', unsafe_allow_html=True)
327
+ if result["steps"]:
328
+ with st.expander(f"🔍 Agent used {len(result['steps'])} tool(s)"):
329
+ for i, (action, res) in enumerate(result["steps"]):
330
+ st.markdown(f"**Step {i+1}: `{action.tool}`**")
331
+ st.code(str(res)[:300] + "...", language="text")
332
+
333
+
334
+ # ════════════════════════════════════════════════════════════════
335
+ # TAB 2 — Chat
336
+ # ════════════════════════════════════════════════════════════════
337
+ with tab2:
338
+ st.markdown("#### 💬 Ask Anything About Your Data")
339
+ st.markdown("*The autonomous agent plans, uses tools, and reasons step-by-step to answer your question.*")
340
+
341
+ # Suggested questions
342
+ st.markdown("**Quick questions to try:**")
343
+ suggestions = [
344
+ "Give me a full insight report on this data",
345
+ "Are there any outliers or anomalies?",
346
+ "What correlations exist between numeric columns?",
347
+ ]
348
+ q_cols = st.columns(3)
349
+ for i, s in enumerate(suggestions):
350
+ with q_cols[i]:
351
+ if st.button(s, key=f"sug_{i}"):
352
+ st.session_state["prefill_q"] = s
353
+
354
+ # Chat history
355
+ for turn in st.session_state.chat_history:
356
+ st.markdown(f'<div class="user-bubble">👤 {turn["user"]}</div>', unsafe_allow_html=True)
357
+ # Show agent reasoning steps
358
+ if turn.get("steps"):
359
+ with st.expander(f"🔍 Agent used {len(turn['steps'])} tool(s) — click to see reasoning"):
360
+ for i, (action, result) in enumerate(turn["steps"]):
361
+ st.markdown(f"**Step {i+1}: `{action.tool}`**")
362
+ st.caption(f"Input: {action.tool_input}")
363
+ st.code(str(result)[:500] + ("..." if len(str(result)) > 500 else ""), language="text")
364
+ st.markdown(f'<div class="agent-bubble">🧠 {turn["agent"]}</div>', unsafe_allow_html=True)
365
+
366
+ # Input
367
+ prefill = st.session_state.pop("prefill_q", "")
368
+ question = st.text_input(
369
+ "Ask a question...",
370
+ value=prefill,
371
+ placeholder="e.g. Which category has the highest profit? Find outliers in sales.",
372
+ label_visibility="collapsed",
373
+ )
374
+
375
+ col_send, col_clear = st.columns([1, 5])
376
+ with col_send:
377
+ send = st.button("Send 🚀")
378
+ with col_clear:
379
+ if st.button("Clear Chat"):
380
+ st.session_state.chat_history = []
381
+ st.rerun()
382
+
383
+ if send and question.strip():
384
+ # Build LangChain chat history from session
385
+ from langchain_core.messages import HumanMessage as HM, AIMessage
386
+ lc_history = []
387
+ for turn in st.session_state.chat_history:
388
+ lc_history.append(HM(content=turn["user"]))
389
+ lc_history.append(AIMessage(content=turn["agent"]))
390
+
391
+ with st.spinner("🤖 Agent is planning and executing tools..."):
392
+ set_dataframe(df, profile)
393
+ result = run_agent(question, st.session_state.agent_executor, lc_history)
394
+ answer = result["output"]
395
+ steps = result["steps"]
396
+
397
+ # Get chart recommendation
398
+ try:
399
+ chart_json = json.loads(recommend_chart.invoke(question))
400
+ except Exception:
401
+ chart_json = None
402
+
403
+ st.session_state.chat_history.append({
404
+ "user": question,
405
+ "agent": answer,
406
+ "steps": steps,
407
+ })
408
+
409
+ st.markdown(f'<div class="user-bubble">👤 {question}</div>', unsafe_allow_html=True)
410
+
411
+ # Show reasoning steps
412
+ if steps:
413
+ with st.expander(f"🔍 Agent used {len(steps)} tool(s) — click to see reasoning"):
414
+ for i, (action, res) in enumerate(steps):
415
+ st.markdown(f"**Step {i+1}: `{action.tool}`**")
416
+ st.caption(f"Input: {action.tool_input}")
417
+ st.code(str(res)[:500] + ("..." if len(str(res)) > 500 else ""), language="text")
418
+
419
+ st.markdown(f'<div class="agent-bubble">🧠 {answer}</div>', unsafe_allow_html=True)
420
+
421
+ # Auto chart
422
+ if chart_json:
423
+ try:
424
+ fig = make_plotly_chart(
425
+ chart_json["chart_type"], df, profile,
426
+ x_col=chart_json.get("x_col"),
427
+ y_col=chart_json.get("y_col"),
428
+ )
429
+ st.plotly_chart(fig, use_container_width=True)
430
+ except Exception:
431
+ pass
432
+
433
+
434
+ # ════════════════════════════════════════════════════════════════
435
+ # TAB 3 — Charts
436
+ # ════════════════════════════════════════════════════════════════
437
+ with tab3:
438
+ st.markdown("#### 🎨 Custom Chart Builder")
439
+
440
+ chart_options = {
441
+ "Correlation Heatmap": "correlation_heatmap",
442
+ "Distribution Plot": "distribution_plots",
443
+ "Box Plots": "box_plots",
444
+ "Bar Chart": "bar_chart",
445
+ "Pie Chart": "pie_chart",
446
+ "Scatter Plot": "scatter",
447
+ "Line Chart": "line",
448
+ "Scatter Matrix": "scatter_matrix",
449
+ }
450
+ if profile["datetime_columns"]:
451
+ chart_options["Time Series"] = "time_series"
452
+
453
+ c1, c2, c3 = st.columns(3)
454
+ with c1:
455
+ chart_label = st.selectbox("Chart Type", list(chart_options.keys()))
456
+ with c2:
457
+ all_cols = ["(auto)"] + df.columns.tolist()
458
+ x_col = st.selectbox("X Column", all_cols)
459
+ with c3:
460
+ y_col = st.selectbox("Y Column", all_cols)
461
+
462
+ x_val = None if x_col == "(auto)" else x_col
463
+ y_val = None if y_col == "(auto)" else y_col
464
+
465
+ if st.button("🎨 Generate Chart"):
466
+ with st.spinner("Rendering..."):
467
+ try:
468
+ fig = make_plotly_chart(
469
+ chart_options[chart_label], df, profile,
470
+ x_col=x_val, y_col=y_val
471
+ )
472
+ st.plotly_chart(fig, use_container_width=True)
473
+ except Exception as e:
474
+ st.error(f"Chart error: {e}")
475
+
476
+ st.markdown("---")
477
+ st.markdown("#### 📊 All Auto-Suggested Charts")
478
+ suggested_all = auto_suggest_charts(profile)
479
+ for i in range(0, len(suggested_all), 2):
480
+ cols = st.columns(2)
481
+ for j, ctype in enumerate(suggested_all[i:i+2]):
482
+ with cols[j]:
483
+ try:
484
+ fig = make_plotly_chart(ctype, df, profile)
485
+ st.plotly_chart(fig, use_container_width=True)
486
+ except Exception as e:
487
+ st.warning(f"Could not render {ctype}")
488
+
489
+
490
+ # ════════════════════════════════════════════════════════════════
491
+ # TAB 4 — Raw Data
492
+ # ════════════════════════════════════════════════════════════════
493
+ with tab4:
494
+ st.markdown("#### 🔍 Raw Data Explorer")
495
+
496
+ # Search/filter
497
+ search = st.text_input("🔎 Filter rows containing...", placeholder="Type to filter...")
498
+ if search:
499
+ mask = df.astype(str).apply(lambda row: row.str.contains(search, case=False, na=False)).any(axis=1)
500
+ display_df = df[mask]
501
+ st.info(f"Showing {len(display_df):,} of {len(df):,} rows matching '{search}'")
502
+ else:
503
+ display_df = df
504
+
505
+ st.dataframe(display_df, use_container_width=True, height=500)
506
+
507
+ # Download
508
+ csv_buf = io.StringIO()
509
+ df.to_csv(csv_buf, index=False)
510
+ st.download_button(
511
+ "⬇️ Download as CSV",
512
+ data=csv_buf.getvalue(),
513
+ file_name="analyzed_data.csv",
514
+ mime="text/csv"
515
+ )
core_agent.py ADDED
@@ -0,0 +1,408 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core_agent.py — TRUE Agentic AI
3
+ LangChain Agent + Tools + Memory + Gemini
4
+ """
5
+
6
+ import os
7
+ import json
8
+ import warnings
9
+ import pandas as pd
10
+ import plotly.express as px
11
+ import plotly.graph_objects as go
12
+ from dotenv import load_dotenv
13
+
14
+ from langchain_google_genai import ChatGoogleGenerativeAI
15
+ from langchain_core.messages import HumanMessage, SystemMessage
16
+ from langchain_core.tools import tool
17
+ from langchain.agents import AgentExecutor, create_tool_calling_agent
18
+ from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
19
+ from langchain_community.chat_message_histories import ChatMessageHistory
20
+
21
+ warnings.filterwarnings("ignore")
22
+ load_dotenv()
23
+
24
+ PALETTE = ["#6C63FF", "#FF6584", "#43E97B", "#F7971E", "#4FC3F7", "#CE93D8"]
25
+ DARK_BG = "#0F0F1A"
26
+ CARD_BG = "#1A1A2E"
27
+
28
+ _df: pd.DataFrame = None
29
+ _profile: dict = None
30
+
31
+ def set_dataframe(df, profile):
32
+ global _df, _profile
33
+ _df = df
34
+ _profile = profile
35
+
36
+ def get_llm(api_key: str):
37
+ return ChatGoogleGenerativeAI(
38
+ model="gemini-1.5-flash",
39
+ google_api_key=api_key,
40
+ temperature=0.3,
41
+ convert_system_message_to_human=True,
42
+ )
43
+
44
+ def load_file(file):
45
+ name = file.name.lower()
46
+ if name.endswith(".csv"):
47
+ return pd.read_csv(file), "CSV"
48
+ elif name.endswith((".xlsx", ".xls")):
49
+ return pd.read_excel(file), "Excel"
50
+ elif name.endswith(".json"):
51
+ content = json.load(file)
52
+ if isinstance(content, list):
53
+ df = pd.DataFrame(content)
54
+ else:
55
+ df = pd.DataFrame(content) if any(isinstance(v, list) for v in content.values()) else pd.DataFrame([content])
56
+ return df, "JSON"
57
+ else:
58
+ raise ValueError(f"Unsupported file type: {name}")
59
+
60
+ def profile_dataframe(df):
61
+ numeric_cols = df.select_dtypes(include="number").columns.tolist()
62
+ category_cols = df.select_dtypes(include=["object", "category"]).columns.tolist()
63
+ datetime_cols = df.select_dtypes(include=["datetime"]).columns.tolist()
64
+ profile = {
65
+ "shape": df.shape,
66
+ "columns": df.columns.tolist(),
67
+ "dtypes": df.dtypes.astype(str).to_dict(),
68
+ "numeric_columns": numeric_cols,
69
+ "categorical_columns": category_cols,
70
+ "datetime_columns": datetime_cols,
71
+ "null_counts": df.isnull().sum().to_dict(),
72
+ "null_pct": (df.isnull().mean() * 100).round(2).to_dict(),
73
+ "duplicates": int(df.duplicated().sum()),
74
+ }
75
+ if numeric_cols:
76
+ profile["numeric_stats"] = df[numeric_cols].describe().round(3).to_dict()
77
+ if category_cols:
78
+ profile["top_categories"] = {col: df[col].value_counts().head(5).to_dict() for col in category_cols}
79
+ return profile
80
+
81
+ def profile_to_text(profile, df):
82
+ rows, cols = profile["shape"]
83
+ lines = [
84
+ f"Dataset: {rows} rows x {cols} columns",
85
+ f"Numeric columns : {', '.join(profile['numeric_columns']) or 'None'}",
86
+ f"Categorical cols : {', '.join(profile['categorical_columns']) or 'None'}",
87
+ f"Datetime cols : {', '.join(profile['datetime_columns']) or 'None'}",
88
+ f"Missing values : {sum(profile['null_counts'].values())} total",
89
+ f"Duplicate rows : {profile['duplicates']}",
90
+ "", "--- Sample Data (first 5 rows) ---",
91
+ df.head(5).to_string(index=False),
92
+ ]
93
+ if profile.get("numeric_stats"):
94
+ lines += ["", "--- Numeric Stats ---"]
95
+ for col, stats in profile["numeric_stats"].items():
96
+ lines.append(f" {col}: mean={stats.get('mean','?')}, std={stats.get('std','?')}, min={stats.get('min','?')}, max={stats.get('max','?')}")
97
+ return "\n".join(lines)
98
+
99
+ # ══════════════════════════════════════════════
100
+ # AGENT TOOLS
101
+ # ══════════════════════════════════════════════
102
+
103
+ @tool
104
+ def profile_data(query: str) -> str:
105
+ """Get full statistical profile of the dataset. Use this FIRST before any analysis."""
106
+ if _df is None:
107
+ return "No dataset loaded. Please upload a file first."
108
+ return profile_to_text(_profile, _df)
109
+
110
+ @tool
111
+ def analyze_column(column_name: str) -> str:
112
+ """Deeply analyze a specific column. Provide the exact column name."""
113
+ if _df is None:
114
+ return "No dataset loaded."
115
+ if column_name not in _df.columns:
116
+ return f"Column '{column_name}' not found. Available: {_df.columns.tolist()}"
117
+ col = _df[column_name]
118
+ result = [f"Analysis of '{column_name}'", f"Type: {col.dtype}",
119
+ f"Non-null: {col.count()} / {len(col)}", f"Nulls: {col.isnull().sum()} ({col.isnull().mean()*100:.1f}%)"]
120
+ if pd.api.types.is_numeric_dtype(col):
121
+ Q1, Q3 = col.quantile(0.25), col.quantile(0.75)
122
+ IQR = Q3 - Q1
123
+ outliers = int(((col < Q1 - 1.5*IQR) | (col > Q3 + 1.5*IQR)).sum())
124
+ result += [f"Mean: {col.mean():.3f}", f"Median: {col.median():.3f}",
125
+ f"Std: {col.std():.3f}", f"Min: {col.min()}", f"Max: {col.max()}",
126
+ f"Skewness: {col.skew():.3f}", f"Outliers: {outliers}"]
127
+ else:
128
+ result += [f"Unique values: {col.nunique()}",
129
+ f"Top 5: {col.value_counts().head(5).to_dict()}",
130
+ f"Most common: {col.mode()[0] if not col.mode().empty else 'N/A'}"]
131
+ return "\n".join(result)
132
+
133
+ @tool
134
+ def find_correlations(query: str) -> str:
135
+ """Find correlations between numeric columns. Highlights strong relationships."""
136
+ if _df is None:
137
+ return "No dataset loaded."
138
+ num_cols = _profile["numeric_columns"]
139
+ if len(num_cols) < 2:
140
+ return "Need at least 2 numeric columns."
141
+ corr = _df[num_cols].corr().round(3)
142
+ strong = []
143
+ for i in range(len(num_cols)):
144
+ for j in range(i+1, len(num_cols)):
145
+ val = corr.iloc[i, j]
146
+ if abs(val) >= 0.5:
147
+ strength = "strong" if abs(val) >= 0.8 else "moderate"
148
+ direction = "positive" if val > 0 else "negative"
149
+ strong.append(f" {num_cols[i]} <-> {num_cols[j]}: {val} ({strength} {direction})")
150
+ result = ["Correlation Matrix:", corr.to_string()]
151
+ if strong:
152
+ result += ["", "Notable correlations:"] + strong
153
+ else:
154
+ result.append("No strong correlations found (|r| >= 0.5)")
155
+ return "\n".join(result)
156
+
157
+ @tool
158
+ def detect_anomalies(query: str) -> str:
159
+ """Detect outliers and anomalies across all numeric columns using IQR method."""
160
+ if _df is None:
161
+ return "No dataset loaded."
162
+ num_cols = _profile["numeric_columns"]
163
+ if not num_cols:
164
+ return "No numeric columns found."
165
+ results = ["Anomaly Detection Report:"]
166
+ total = 0
167
+ for col in num_cols:
168
+ series = _df[col].dropna()
169
+ Q1, Q3 = series.quantile(0.25), series.quantile(0.75)
170
+ IQR = Q3 - Q1
171
+ outliers = _df[((_df[col] < Q1 - 1.5*IQR) | (_df[col] > Q3 + 1.5*IQR))][col]
172
+ if len(outliers) > 0:
173
+ total += len(outliers)
174
+ results.append(f" {col}: {len(outliers)} outliers | Examples: {outliers.head(3).tolist()}")
175
+ results.append(f"\nTotal outliers: {total}")
176
+ if total == 0:
177
+ results.append("No significant outliers detected.")
178
+ return "\n".join(results)
179
+
180
+ @tool
181
+ def run_aggregation(query: str) -> str:
182
+ """
183
+ Compute group-by aggregations.
184
+ Format input as: 'group_col|agg_col|function'
185
+ Example: 'category|sales|sum'
186
+ Supported: sum, mean, count, max, min, median
187
+ """
188
+ if _df is None:
189
+ return "No dataset loaded."
190
+ try:
191
+ parts = [p.strip() for p in query.split("|")]
192
+ if len(parts) == 3:
193
+ group_col, agg_col, func = parts
194
+ elif len(parts) == 2:
195
+ group_col, agg_col, func = parts[0], parts[1], "mean"
196
+ else:
197
+ cat_cols = _profile["categorical_columns"]
198
+ num_cols = _profile["numeric_columns"]
199
+ if not cat_cols or not num_cols:
200
+ return "Could not determine columns."
201
+ group_col, agg_col, func = cat_cols[0], num_cols[0], "sum"
202
+ if group_col not in _df.columns:
203
+ return f"Column '{group_col}' not found. Available: {_df.columns.tolist()}"
204
+ if agg_col not in _df.columns:
205
+ return f"Column '{agg_col}' not found. Available: {_df.columns.tolist()}"
206
+ fn = func.lower()
207
+ result = _df.groupby(group_col)[agg_col].agg(fn).reset_index().sort_values(agg_col, ascending=False)
208
+ result.columns = [group_col, f"{fn}_{agg_col}"]
209
+ return f"Aggregation: {fn.upper()} of '{agg_col}' by '{group_col}'\n{result.to_string(index=False)}"
210
+ except Exception as e:
211
+ return f"Aggregation error: {str(e)}"
212
+
213
+ @tool
214
+ def generate_insight_report(query: str) -> str:
215
+ """Generate a complete automated insight report with data quality score, patterns, and recommendations."""
216
+ if _df is None:
217
+ return "No dataset loaded."
218
+ rows, cols = _profile["shape"]
219
+ num_cols = _profile["numeric_columns"]
220
+ cat_cols = _profile["categorical_columns"]
221
+ nulls = sum(_profile["null_counts"].values())
222
+ null_pct = (nulls / (rows * cols) * 100) if rows * cols > 0 else 0
223
+ quality = 100
224
+ if null_pct > 20: quality -= 30
225
+ elif null_pct > 10: quality -= 15
226
+ elif null_pct > 5: quality -= 5
227
+ if _profile["duplicates"] > 0: quality -= 10
228
+ report = [
229
+ "=" * 50, "AUTOMATED INSIGHT REPORT", "=" * 50, "",
230
+ "1. DATASET OVERVIEW",
231
+ f" Rows: {rows:,} | Columns: {cols}",
232
+ f" Numeric: {len(num_cols)} | Categorical: {len(cat_cols)}",
233
+ f" Data Quality Score: {quality}/100", "",
234
+ "2. DATA QUALITY",
235
+ f" Missing values: {nulls} ({null_pct:.1f}%)",
236
+ f" Duplicate rows: {_profile['duplicates']}",
237
+ ]
238
+ if nulls > 0:
239
+ worst = max(_profile["null_pct"].items(), key=lambda x: x[1])
240
+ report.append(f" Worst column: '{worst[0]}' ({worst[1]}% missing)")
241
+ report += ["", "3. KEY STATISTICS"]
242
+ for col in num_cols[:5]:
243
+ stats = _profile.get("numeric_stats", {}).get(col, {})
244
+ report.append(f" {col}: mean={stats.get('mean','?')}, range=[{stats.get('min','?')}, {stats.get('max','?')}]")
245
+ if cat_cols:
246
+ report += ["", "4. CATEGORICAL SUMMARY"]
247
+ for col in cat_cols[:3]:
248
+ top = _df[col].value_counts().index[0] if not _df[col].empty else "N/A"
249
+ report.append(f" {col}: {_df[col].nunique()} unique | most common = '{top}'")
250
+ report += [
251
+ "", "5. RECOMMENDATIONS",
252
+ f" - {'Fix missing values' if null_pct > 5 else 'Data completeness looks good'}",
253
+ f" - {'Remove duplicate rows' if _profile['duplicates'] > 0 else 'No duplicates found'}",
254
+ f" - {'Run correlation analysis' if len(num_cols) >= 2 else 'Need more numeric columns'}",
255
+ f" - {'Encode categorical columns for ML' if cat_cols else 'Add categorical features'}",
256
+ "", "=" * 50,
257
+ ]
258
+ return "\n".join(report)
259
+
260
+ @tool
261
+ def recommend_chart(question: str) -> str:
262
+ """Recommend best chart type for a question. Returns JSON with chart_type, x_col, y_col."""
263
+ if _profile is None:
264
+ return json.dumps({"chart_type": "bar_chart", "x_col": None, "y_col": None})
265
+ num_cols = _profile["numeric_columns"]
266
+ cat_cols = _profile["categorical_columns"]
267
+ dt_cols = _profile["datetime_columns"]
268
+ q = question.lower()
269
+ if any(w in q for w in ["trend", "over time", "time", "date"]) and dt_cols and num_cols:
270
+ return json.dumps({"chart_type": "time_series", "x_col": dt_cols[0], "y_col": num_cols[0]})
271
+ elif any(w in q for w in ["correlat", "relationship", "vs", "versus"]) and len(num_cols) >= 2:
272
+ return json.dumps({"chart_type": "correlation_heatmap", "x_col": None, "y_col": None})
273
+ elif any(w in q for w in ["distribut", "spread", "histogram"]) and num_cols:
274
+ return json.dumps({"chart_type": "distribution_plots", "x_col": None, "y_col": num_cols[0]})
275
+ elif any(w in q for w in ["outlier", "box", "range"]) and num_cols:
276
+ return json.dumps({"chart_type": "box_plots", "x_col": None, "y_col": None})
277
+ elif any(w in q for w in ["proportion", "share", "percent", "pie"]) and cat_cols:
278
+ return json.dumps({"chart_type": "pie_chart", "x_col": cat_cols[0], "y_col": None})
279
+ elif cat_cols and num_cols:
280
+ return json.dumps({"chart_type": "bar_chart", "x_col": cat_cols[0], "y_col": num_cols[0]})
281
+ elif len(num_cols) >= 2:
282
+ return json.dumps({"chart_type": "scatter", "x_col": num_cols[0], "y_col": num_cols[1]})
283
+ return json.dumps({"chart_type": "bar_chart", "x_col": None, "y_col": None})
284
+
285
+ # ══════════════════════════════════════════════
286
+ # AGENT BUILDER
287
+ # ══════════════════════════════════════════════
288
+
289
+ TOOLS = [profile_data, analyze_column, find_correlations,
290
+ detect_anomalies, run_aggregation, generate_insight_report, recommend_chart]
291
+
292
+ SYSTEM_PROMPT = """You are DataMind, an expert autonomous data analyst AI agent.
293
+
294
+ You have access to powerful tools to analyze any dataset. When a user asks a question:
295
+ 1. THINK about what tools you need
296
+ 2. PLAN your steps (use multiple tools in sequence when needed)
297
+ 3. EXECUTE each tool
298
+ 4. SYNTHESIZE the results into a clear, insightful answer
299
+ 5. SELF-CORRECT if a tool returns an error
300
+
301
+ Your tools:
302
+ - profile_data: Get dataset overview (use this first)
303
+ - analyze_column: Deep dive into a specific column
304
+ - find_correlations: Find relationships between numeric columns
305
+ - detect_anomalies: Find outliers and data quality issues
306
+ - run_aggregation: Group-by calculations
307
+ - generate_insight_report: Full automated analysis report
308
+ - recommend_chart: Suggest best visualization
309
+
310
+ Always be precise, proactive, and thorough. Use multiple tools when needed.
311
+ Remember conversation history and refer to previous questions when relevant."""
312
+
313
+ def build_agent(llm) -> AgentExecutor:
314
+ prompt = ChatPromptTemplate.from_messages([
315
+ ("system", SYSTEM_PROMPT),
316
+ MessagesPlaceholder(variable_name="chat_history"),
317
+ ("human", "{input}"),
318
+ MessagesPlaceholder(variable_name="agent_scratchpad"),
319
+ ])
320
+ agent = create_tool_calling_agent(llm, TOOLS, prompt)
321
+ return AgentExecutor(
322
+ agent=agent, tools=TOOLS, verbose=True,
323
+ max_iterations=6, early_stopping_method="generate",
324
+ handle_parsing_errors=True, return_intermediate_steps=True,
325
+ )
326
+
327
+ def run_agent(question: str, agent_executor: AgentExecutor, chat_history: list) -> dict:
328
+ try:
329
+ result = agent_executor.invoke({"input": question, "chat_history": chat_history})
330
+ return {"output": result.get("output", "No response."), "steps": result.get("intermediate_steps", []), "error": None}
331
+ except Exception as e:
332
+ return {"output": f"Agent error: {str(e)}", "steps": [], "error": str(e)}
333
+
334
+ # ── Chart Engine ──────���───────────────────────
335
+ def auto_suggest_charts(profile):
336
+ suggestions = []
337
+ if len(profile["numeric_columns"]) >= 2:
338
+ suggestions.extend(["correlation_heatmap", "scatter_matrix"])
339
+ if profile["numeric_columns"]:
340
+ suggestions.extend(["distribution_plots", "box_plots"])
341
+ if profile["categorical_columns"] and profile["numeric_columns"]:
342
+ suggestions.extend(["bar_chart", "pie_chart"])
343
+ if profile["datetime_columns"] and profile["numeric_columns"]:
344
+ suggestions.append("time_series")
345
+ return suggestions
346
+
347
+ def make_plotly_chart(chart_type, df, profile, x_col=None, y_col=None, color_col=None):
348
+ num_cols = profile["numeric_columns"]
349
+ cat_cols = profile["categorical_columns"]
350
+ template = "plotly_dark"
351
+ if chart_type == "correlation_heatmap" and len(num_cols) >= 2:
352
+ fig = px.imshow(df[num_cols].corr().round(2), text_auto=True,
353
+ color_continuous_scale="RdBu_r", title="Correlation Heatmap",
354
+ template=template, color_continuous_midpoint=0)
355
+ elif chart_type == "distribution_plots" and num_cols:
356
+ col = y_col or num_cols[0]
357
+ fig = px.histogram(df, x=col, nbins=30, marginal="box",
358
+ title=f"Distribution of {col}",
359
+ color_discrete_sequence=PALETTE, template=template)
360
+ elif chart_type == "box_plots" and num_cols:
361
+ fig = go.Figure()
362
+ for i, col in enumerate(num_cols[:6]):
363
+ fig.add_trace(go.Box(y=df[col], name=col, marker_color=PALETTE[i % len(PALETTE)]))
364
+ fig.update_layout(title="Box Plots", template=template)
365
+ elif chart_type == "bar_chart" and cat_cols and num_cols:
366
+ xc, yc = x_col or cat_cols[0], y_col or num_cols[0]
367
+ agg = df.groupby(xc)[yc].mean().reset_index().sort_values(yc, ascending=False).head(15)
368
+ fig = px.bar(agg, x=xc, y=yc, color=yc, color_continuous_scale="Viridis",
369
+ title=f"Average {yc} by {xc}", template=template)
370
+ elif chart_type == "pie_chart" and cat_cols:
371
+ col = x_col or cat_cols[0]
372
+ counts = df[col].value_counts().head(8)
373
+ fig = px.pie(values=counts.values, names=counts.index,
374
+ title=f"Distribution of {col}",
375
+ color_discrete_sequence=PALETTE, template=template)
376
+ elif chart_type == "scatter_matrix" and len(num_cols) >= 2:
377
+ fig = px.scatter_matrix(df, dimensions=num_cols[:4],
378
+ color=cat_cols[0] if cat_cols else None,
379
+ color_discrete_sequence=PALETTE, title="Scatter Matrix", template=template)
380
+ fig.update_traces(diagonal_visible=False, showupperhalf=False)
381
+ elif chart_type == "time_series" and profile["datetime_columns"] and num_cols:
382
+ dt_col = profile["datetime_columns"][0]
383
+ yc = y_col or num_cols[0]
384
+ fig = px.line(df.sort_values(dt_col), x=dt_col, y=yc,
385
+ title=f"{yc} over Time", color_discrete_sequence=PALETTE, template=template)
386
+ elif chart_type == "scatter" and len(num_cols) >= 2:
387
+ xc, yc = x_col or num_cols[0], y_col or num_cols[1]
388
+ fig = px.scatter(df, x=xc, y=yc,
389
+ color=color_col or (cat_cols[0] if cat_cols else None),
390
+ color_discrete_sequence=PALETTE, title=f"{xc} vs {yc}",
391
+ trendline="ols", template=template)
392
+ elif chart_type == "line" and num_cols:
393
+ xc = x_col or (profile["datetime_columns"][0] if profile["datetime_columns"] else num_cols[0])
394
+ yc = y_col or num_cols[0]
395
+ fig = px.line(df, x=xc, y=yc, color_discrete_sequence=PALETTE,
396
+ title=f"{yc} trend", template=template)
397
+ else:
398
+ if num_cols:
399
+ means = df[num_cols[:8]].mean()
400
+ fig = px.bar(x=means.index, y=means.values, color=means.values,
401
+ color_continuous_scale="Viridis", title="Column Means", template=template)
402
+ else:
403
+ fig = go.Figure()
404
+ fig.update_layout(template=template, title="Chart Unavailable")
405
+ fig.update_layout(paper_bgcolor=DARK_BG, plot_bgcolor=CARD_BG,
406
+ font=dict(family="DM Sans, sans-serif", color="#E0E0FF"),
407
+ margin=dict(l=40, r=40, t=60, b=40))
408
+ return fig
requirements.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.3.7
2
+ langchain-google-genai==2.0.5
3
+ langchain-experimental==0.3.3
4
+ langchain-community==0.3.7
5
+ google-generativeai==0.8.3
6
+ pandas==2.2.3
7
+ openpyxl==3.1.5
8
+ xlrd==2.0.1
9
+ matplotlib==3.9.2
10
+ seaborn==0.13.2
11
+ plotly==5.24.1
12
+ streamlit==1.40.1
13
+ python-dotenv==1.0.1
14
+ tabulate==0.9.0
sample_data.csv ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ order_id,date,product,category,region,sales,quantity,profit,customer_age,customer_gender
2
+ 1001,2024-01-05,Laptop Pro,Electronics,North,1200.00,1,240.00,34,Male
3
+ 1002,2024-01-07,Office Chair,Furniture,South,350.00,2,70.00,45,Female
4
+ 1003,2024-01-08,Wireless Mouse,Electronics,East,45.00,5,9.00,28,Male
5
+ 1004,2024-01-10,Standing Desk,Furniture,West,650.00,1,130.00,52,Female
6
+ 1005,2024-01-12,Mechanical Keyboard,Electronics,North,120.00,3,36.00,30,Male
7
+ 1006,2024-01-15,Monitor 4K,Electronics,South,400.00,2,80.00,41,Female
8
+ 1007,2024-01-18,Notebook Set,Stationery,East,25.00,10,7.50,23,Male
9
+ 1008,2024-01-20,Ergonomic Chair,Furniture,West,520.00,1,104.00,38,Female
10
+ 1009,2024-01-22,USB Hub,Electronics,North,35.00,8,10.50,26,Male
11
+ 1010,2024-01-25,Desk Lamp,Furniture,South,60.00,4,18.00,49,Female
12
+ 1011,2024-02-01,Laptop Pro,Electronics,East,1200.00,2,480.00,36,Male
13
+ 1012,2024-02-03,Wireless Headphones,Electronics,West,200.00,3,60.00,31,Female
14
+ 1013,2024-02-05,Pen Set,Stationery,North,15.00,20,6.00,22,Male
15
+ 1014,2024-02-08,Gaming Chair,Furniture,South,450.00,1,90.00,27,Female
16
+ 1015,2024-02-10,Tablet,Electronics,East,600.00,2,120.00,43,Male
17
+ 1016,2024-02-14,Bookshelf,Furniture,West,180.00,1,36.00,55,Female
18
+ 1017,2024-02-16,Webcam HD,Electronics,North,80.00,6,24.00,29,Male
19
+ 1018,2024-02-18,Sticky Notes,Stationery,South,8.00,50,4.00,24,Female
20
+ 1019,2024-02-20,Monitor Stand,Furniture,East,95.00,3,28.50,37,Male
21
+ 1020,2024-02-22,Smartphone,Electronics,West,900.00,2,180.00,33,Female
22
+ 1021,2024-03-01,Laptop Pro,Electronics,North,1200.00,3,720.00,40,Male
23
+ 1022,2024-03-04,Office Chair,Furniture,South,350.00,4,140.00,48,Female
24
+ 1023,2024-03-06,Drawing Tablet,Electronics,East,300.00,1,60.00,25,Male
25
+ 1024,2024-03-09,Filing Cabinet,Furniture,West,220.00,2,44.00,53,Female
26
+ 1025,2024-03-12,Wireless Mouse,Electronics,North,45.00,10,22.50,32,Male
27
+ 1026,2024-03-15,External SSD,Electronics,South,150.00,4,45.00,44,Female
28
+ 1027,2024-03-18,Highlighters,Stationery,East,12.00,30,5.40,21,Male
29
+ 1028,2024-03-20,Desk Organizer,Furniture,West,40.00,7,14.00,35,Female
30
+ 1029,2024-03-22,Smart Speaker,Electronics,North,120.00,5,36.00,39,Male
31
+ 1030,2024-03-25,Printer,Electronics,South,280.00,2,56.00,46,Female