sanjaystarc commited on
Commit
987cfe3
Β·
verified Β·
1 Parent(s): cec50fe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -97
app.py CHANGED
@@ -6,112 +6,106 @@ import seaborn as sns
6
  import time
7
  import random
8
 
9
- # Updated LangChain Imports
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
12
- from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
13
 
14
- # --- 1. PAGE SETUP ---
 
15
  st.set_page_config(
16
- page_title="Agentic Data Analyst",
17
- page_icon="πŸ“Š",
18
  layout="wide"
19
  )
20
 
21
- # Fetch API Key from environment
22
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
23
 
 
24
  def main():
25
- st.title("πŸ€– Agentic Data Analyst")
26
  st.markdown("""
27
- This agent follows an **agentic workflow**: it reasons, writes code,
28
- observes results, and self-corrects.
29
  """)
30
 
31
  if not GEMINI_API_KEY:
32
- st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable.")
33
  st.stop()
34
 
35
- # --- 2. DATA LOADING ---
36
  uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
37
 
38
  if uploaded_file:
39
  @st.cache_data
40
  def load_data(file):
41
  return pd.read_csv(file)
42
-
43
  df = load_data(uploaded_file)
44
-
45
  with st.expander("πŸ“„ Data Overview"):
46
  st.dataframe(df.head())
47
- st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
48
 
49
- # --- 3. AGENT CONFIGURATION ---
50
- query = st.text_area("What analysis would you like to perform?",
51
- placeholder="e.g., 'Plot the distribution of prices.'")
 
 
52
 
53
  if st.button("Run Agent") and query:
54
- # Initialize LLM
 
55
  llm = ChatGoogleGenerativeAI(
56
- model="gemini-2.5-flash-preview-09-2025",
57
  google_api_key=GEMINI_API_KEY,
58
  temperature=0,
59
- max_retries=6
 
60
  )
61
 
62
- # Add dataframe context manually in prefix
63
- df_context = f"The dataframe 'df' has the following columns: {', '.join(df.columns)}"
64
-
 
65
  custom_prefix = f"""
66
- You are a professional Python data analyst working inside a Streamlit+Pandas agent.
67
- The dataframe is named `df`.
68
- {df_context}
69
-
70
- 🚨 STRICT RULES YOU MUST FOLLOW 🚨
71
-
72
- # 1 β€” CODE QUALITY RULES
73
- - Code MUST be short, clean, and correct.
74
- - NEVER repeat imports. Use ONLY:
75
- import matplotlib.pyplot as plt
76
- import seaborn as sns
77
- - NEVER import streamlit inside the Action Input. (The environment already has `st`.)
78
- - NEVER use .copy() unless absolutely necessary.
79
- - NEVER print() results β€” always return plots or data.
80
- - NEVER write long multi-step code unless required.
81
-
82
- # 2 β€” PLOTTING RULES
83
- - Before plotting any filtered dataframe, always check: if filtered.empty:
84
- - Always start plots with:
85
- plt.figure()
86
- - Always end plots with:
87
- st.pyplot(plt.gcf())
88
-
89
- # 3 β€” OUTPUT FORMAT RULES
90
- - If you need to run code β†’ respond ONLY with:
91
- Action: python_repl_ast
92
- Action Input:
93
- <python code only>
94
- - If no code is needed β†’ respond ONLY with:
95
- Final Answer: <answer>
96
-
97
- # 4 β€” WHAT YOU ARE OPTIMIZED FOR
98
- - Reliable code
99
- - Short code
100
- - Zero unnecessary steps
101
- - Zero repeated imports
102
- - Zero noisy output
103
- - Zero markdown
104
-
105
- Follow these rules EXACTLY.
106
-
107
- """
108
-
109
- # IMPORTANT: No suffix β†’ avoids ValueError
110
  agent = create_pandas_dataframe_agent(
111
  llm,
112
  df,
113
  verbose=True,
114
- agent_type="zero-shot-react-description", # required agent type
115
  allow_dangerous_code=True,
116
  prefix=custom_prefix,
117
  include_df_in_prompt=False,
@@ -119,38 +113,26 @@ def main():
119
  agent_executor_kwargs={"handle_parsing_errors": True}
120
  )
121
 
122
- # --- 4. EXECUTION ---
123
  st.subheader("🧠 Reasoning & Execution")
124
- thought_container = st.container()
125
- st_callback = StreamlitCallbackHandler(thought_container)
126
-
127
- with st.spinner("Agent is analyzing..."):
128
- max_loop_retries = 3
129
- for attempt in range(max_loop_retries):
130
- try:
131
- response = agent.run(query, callbacks=[st_callback])
132
- st.markdown("---")
133
- st.subheader("βœ… Final Analysis Result")
134
- st.success(response)
135
- break
136
-
137
- except Exception as e:
138
- if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
139
- if attempt < max_loop_retries - 1:
140
- wait_time = (2 ** attempt) + random.random()
141
- st.warning(f"Quota reached. Retrying in {wait_time:.2f} seconds...")
142
- time.sleep(wait_time)
143
- continue
144
- else:
145
- st.error("Rate limit exceeded consistently. Try again later.")
146
- else:
147
- st.error("Agent encountered a parsing or execution error.")
148
- with st.expander("Show Technical Error"):
149
- st.code(str(e))
150
- break
151
 
152
  else:
153
- st.info("πŸ‘† Upload a CSV to begin.")
 
154
 
155
  if __name__ == "__main__":
156
  main()
 
6
  import time
7
  import random
8
 
9
+ # LangChain + Gemini
10
  from langchain_google_genai import ChatGoogleGenerativeAI
11
  from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 
12
 
13
+
14
+ # --- PAGE SETUP ---
15
  st.set_page_config(
16
+ page_title="Agentic Data Analyst",
17
+ page_icon="πŸ“Š",
18
  layout="wide"
19
  )
20
 
 
21
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
22
 
23
+
24
  def main():
25
+ st.title("πŸ€– Agentic Data Analyst (Gemini 2.5 Flash)")
26
  st.markdown("""
27
+ This agent intelligently analyzes your dataset using an agentic workflow.
28
+ It writes Python code, executes it, and returns insights.
29
  """)
30
 
31
  if not GEMINI_API_KEY:
32
+ st.error("❌ Missing `GEMINI_API_KEY`. Set it as an environment variable.")
33
  st.stop()
34
 
35
+ # --- CSV UPLOAD ---
36
  uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
37
 
38
  if uploaded_file:
39
  @st.cache_data
40
  def load_data(file):
41
  return pd.read_csv(file)
42
+
43
  df = load_data(uploaded_file)
44
+
45
  with st.expander("πŸ“„ Data Overview"):
46
  st.dataframe(df.head())
47
+ st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")
48
 
49
+ # --- USER QUERY ---
50
+ query = st.text_area(
51
+ "What analysis would you like to perform?",
52
+ placeholder="e.g., Plot Price distribution"
53
+ )
54
 
55
  if st.button("Run Agent") and query:
56
+
57
+ # --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) ---
58
  llm = ChatGoogleGenerativeAI(
59
+ model="gemini-2.5-flash",
60
  google_api_key=GEMINI_API_KEY,
61
  temperature=0,
62
+ max_retries=5,
63
+ streaming=False # IMPORTANT: avoids chunking error
64
  )
65
 
66
+ # Provide dataframe context
67
+ df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}"
68
+
69
+ # --- PREFIX (clean, reliable code rules) ---
70
  custom_prefix = f"""
71
+ You are a professional Python data analyst running inside a Streamlit + Pandas agent.
72
+ The dataframe is named `df`.
73
+ {df_context}
74
+
75
+ 🚨 STRICT RULES 🚨
76
+
77
+ # 1 β€” CODE QUALITY
78
+ - Code must be short, clean, correct.
79
+ - Never repeat imports.
80
+ - Only allowed imports inside Action Input:
81
+ import matplotlib.pyplot as plt
82
+ import seaborn as sns
83
+ - Never import streamlit.
84
+ - Never print().
85
+ - Never wrap outputs in markdown.
86
+
87
+ # 2 β€” PLOTTING RULES
88
+ - Before plotting filtered data, check if filtered.empty.
89
+ - Always start plots with: plt.figure()
90
+ - Always end plots with: st.pyplot(plt.gcf())
91
+
92
+ # 3 β€” OUTPUT FORMAT
93
+ - If code is required β†’ return ONLY:
94
+ Action: python_repl_ast
95
+ Action Input:
96
+ <python code only>
97
+ - If no code is needed β†’ return ONLY:
98
+ Final Answer: <answer>
99
+
100
+ Follow these rules EXACTLY.
101
+ """
102
+
103
+ # --- CREATE AGENT ---
 
 
 
 
 
 
 
 
 
 
 
104
  agent = create_pandas_dataframe_agent(
105
  llm,
106
  df,
107
  verbose=True,
108
+ agent_type="zero-shot-react-description",
109
  allow_dangerous_code=True,
110
  prefix=custom_prefix,
111
  include_df_in_prompt=False,
 
113
  agent_executor_kwargs={"handle_parsing_errors": True}
114
  )
115
 
116
+ # --- EXECUTION ---
117
  st.subheader("🧠 Reasoning & Execution")
118
+
119
+ with st.spinner("Agent analyzing..."):
120
+ try:
121
+ # no callback (Gemini 2.5 streaming not supported)
122
+ response = agent.run(query)
123
+
124
+ st.markdown("---")
125
+ st.subheader("βœ… Final Analysis Result")
126
+ st.success(response)
127
+
128
+ except Exception as e:
129
+ st.error("Agent encountered an error.")
130
+ with st.expander("Show Technical Error"):
131
+ st.code(str(e))
 
 
 
 
 
 
 
 
 
 
 
 
 
132
 
133
  else:
134
+ st.info("πŸ‘† Upload a CSV file to begin.")
135
+
136
 
137
  if __name__ == "__main__":
138
  main()