sanjaystarc commited on
Commit
ee89673
Β·
verified Β·
1 Parent(s): 46d74df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -75
app.py CHANGED
@@ -4,123 +4,91 @@ import pandas as pd
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
 
7
- # LangChain Imports
8
  from langchain_google_genai import ChatGoogleGenerativeAI
9
- from langchain_experimental.agents import create_pandas_dataframe_agent
10
- from langchain.agents import AgentType
11
  from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
12
 
13
- # --- CONFIG ---
14
  st.set_page_config(
15
  page_title="Agentic Data Analyst",
16
  page_icon="πŸ“Š",
17
  layout="wide"
18
  )
19
 
20
- # Use the API key from environment
21
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
22
 
23
  def main():
24
- st.title("πŸ€– Agentic Data Analyst (LangChain + Gemini)")
25
  st.markdown("""
26
- This agent uses a **ReAct (Reason + Act)** loop. It doesn't just guess code;
27
- it executes Python, checks the results, and self-corrects if it encounters errors.
28
  """)
29
 
 
30
  if not GEMINI_API_KEY:
31
- st.error("❌ GEMINI_API_KEY not found in environment variables.")
32
  st.stop()
33
 
34
- # 1. Initialize the Brain (LLM)
35
- # We use temperature 0 for analytical tasks to minimize "hallucinations"
36
- try:
37
- llm = ChatGoogleGenerativeAI(
38
- model="gemini-2.5-flash",
39
- google_api_key=GEMINI_API_KEY,
40
- temperature=0,
41
- )
42
- except Exception as e:
43
- st.error(f"Failed to initialize LLM: {e}")
44
- st.stop()
45
-
46
- # 2. File Upload
47
- uploaded_file = st.file_uploader("Upload your dataset (CSV)", type="csv")
48
 
49
  if uploaded_file:
50
- df = pd.read_csv(uploaded_file)
 
 
 
 
 
51
 
52
- with st.expander("πŸ“„ Data Preview & Schema"):
53
- col1, col2 = st.columns(2)
54
- with col1:
55
- st.write("**First 5 Rows:**")
56
- st.dataframe(df.head())
57
- with col2:
58
- st.write("**Column Info:**")
59
- st.write(df.dtypes)
60
 
61
- # 3. User Input
62
- query = st.text_area(
63
- "What would you like to know?",
64
- placeholder="e.g., 'What is the correlation between age and salary?' or 'Plot a histogram of sales.'"
65
- )
66
 
67
- if st.button("Run Analysis") and query:
68
- # 4. Create the Agent
69
- # create_pandas_dataframe_agent wraps the dataframe and the python tool
 
 
 
 
 
 
 
70
  agent = create_pandas_dataframe_agent(
71
  llm,
72
  df,
73
  verbose=True,
74
- agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
75
- allow_dangerous_code=True, # Necessary to execute Python code
76
  handle_parsing_errors=True
77
  )
78
 
79
- # 5. Execute with Streamlit Callbacks
80
- # This allows us to see the "Thinking" process in the UI
81
- st.subheader("🧠 Agent Thought Process")
82
 
83
- # This container will show the step-by-step reasoning
84
  thought_container = st.container()
85
  st_callback = StreamlitCallbackHandler(thought_container)
86
 
87
- with st.spinner("Agent is analyzing..."):
88
  try:
89
- # The .run() method triggers the agentic loop
90
  response = agent.run(query, callbacks=[st_callback])
91
 
92
  st.markdown("---")
93
- st.subheader("βœ… Final Answer")
94
  st.success(response)
95
 
96
- # Note on Plots:
97
- # If the agent uses plt.show(), it might not render in Streamlit.
98
- # Standard practice for agents is to ask them to use st.pyplot(plt.gcf())
99
- # but the agent often figures out how to display data.
100
-
101
  except Exception as e:
102
- st.error(f"The agent encountered a critical error: {e}")
103
- st.info("Tip: Try rephrasing your question or checking if column names are clear.")
104
-
105
  else:
106
- st.info("πŸ‘† Please upload a CSV file to begin.")
107
-
108
- # --- Sidebar Credits & Info ---
109
- with st.sidebar:
110
- st.header("How it works")
111
- st.markdown("""
112
- **1. Thought:** The LLM analyzes your question and the dataframe schema.
113
-
114
- **2. Action:** It writes and executes Python code using `pandas`.
115
-
116
- **3. Observation:** It looks at the output of that code.
117
-
118
- **4. Final Answer:** If the output satisfies the question, it responds. Otherwise, it loops back to step 1.
119
- """)
120
-
121
- if st.button("Clear Cache"):
122
- st.cache_data.clear()
123
- st.rerun()
124
 
125
  if __name__ == "__main__":
126
  main()
 
4
  import matplotlib.pyplot as plt
5
  import seaborn as sns
6
 
7
+ # Using stable, modern imports to avoid version conflicts
8
  from langchain_google_genai import ChatGoogleGenerativeAI
9
+ from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 
10
  from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
11
 
12
+ # --- 1. PAGE SETUP ---
13
  st.set_page_config(
14
  page_title="Agentic Data Analyst",
15
  page_icon="πŸ“Š",
16
  layout="wide"
17
  )
18
 
19
+ # Fetch API Key from environment
20
  GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
21
 
22
  def main():
23
+ st.title("πŸ€– Agentic Data Analyst")
24
  st.markdown("""
25
+ This agent follows an **agentic workflow**: it reasons about your question, writes Python code,
26
+ observes the output, and self-corrects if it encounters errors.
27
  """)
28
 
29
+ # Check for API Key
30
  if not GEMINI_API_KEY:
31
+ st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable or in Streamlit Secrets.")
32
  st.stop()
33
 
34
+ # --- 2. DATA LOADING ---
35
+ uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  if uploaded_file:
38
+ # Load and cache for performance
39
+ @st.cache_data
40
+ def load_data(file):
41
+ return pd.read_csv(file)
42
+
43
+ df = load_data(uploaded_file)
44
 
45
+ with st.expander("πŸ“„ Data Overview"):
46
+ st.dataframe(df.head())
47
+ st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
 
 
 
 
 
48
 
49
+ # --- 3. AGENT CONFIGURATION ---
50
+ query = st.text_area("What analysis would you like to perform?", placeholder="e.g., 'Analyze the relationship between x and y and show a scatter plot.'")
 
 
 
51
 
52
+ if st.button("Run Agent") and query:
53
+ # Initialize the LLM (using Gemini 2.5 Flash for speed/reasoning balance)
54
+ llm = ChatGoogleGenerativeAI(
55
+ model="gemini-2.5-flash-preview-09-2025",
56
+ google_api_key=GEMINI_API_KEY,
57
+ temperature=0, # Crucial for deterministic data analysis
58
+ )
59
+
60
+ # Create the Pandas Agent
61
+ # Using the string identifier 'zero-shot-react-description' avoids import errors
62
  agent = create_pandas_dataframe_agent(
63
  llm,
64
  df,
65
  verbose=True,
66
+ agent_type="zero-shot-react-description",
67
+ allow_dangerous_code=True, # Required to execute Python on the dataframe
68
  handle_parsing_errors=True
69
  )
70
 
71
+ # --- 4. EXECUTION WITH VISUAL CALLBACKS ---
72
+ st.subheader("🧠 Reasoning & Execution")
 
73
 
74
+ # This container allows the user to see the agent's step-by-step thinking
75
  thought_container = st.container()
76
  st_callback = StreamlitCallbackHandler(thought_container)
77
 
78
+ with st.spinner("Agent is working..."):
79
  try:
80
+ # Execute the loop
81
  response = agent.run(query, callbacks=[st_callback])
82
 
83
  st.markdown("---")
84
+ st.subheader("βœ… Final Analysis Result")
85
  st.success(response)
86
 
 
 
 
 
 
87
  except Exception as e:
88
+ st.error(f"Agent failed to complete the task: {e}")
89
+ st.info("Try rephrasing your query or checking if the column names are easy for the AI to understand.")
 
90
  else:
91
+ st.info("πŸ‘† Upload a CSV to begin the agentic session.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  if __name__ == "__main__":
94
  main()