data-analysis-agent-2.0

Sleeping

App Files Files Community

sanjaystarc commited on Dec 24, 2025

Commit

ee89673

verified ·

1 Parent(s): 46d74df

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -75

app.py CHANGED Viewed

@@ -4,123 +4,91 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns
-# LangChain Imports
 from langchain_google_genai import ChatGoogleGenerativeAI
-from langchain_experimental.agents import create_pandas_dataframe_agent
-from langchain.agents import AgentType
 from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
-# --- CONFIG ---
 st.set_page_config(
     page_title="Agentic Data Analyst",
     page_icon="📊",
     layout="wide"
 )
-# Use the API key from environment
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def main():
-    st.title("🤖 Agentic Data Analyst (LangChain + Gemini)")
     st.markdown("""
-    This agent uses a **ReAct (Reason + Act)** loop. It doesn't just guess code;
-    it executes Python, checks the results, and self-corrects if it encounters errors.
     """)
     if not GEMINI_API_KEY:
-        st.error("❌ GEMINI_API_KEY not found in environment variables.")
         st.stop()
-    # 1. Initialize the Brain (LLM)
-    # We use temperature 0 for analytical tasks to minimize "hallucinations"
-    try:
-        llm = ChatGoogleGenerativeAI(
-            model="gemini-2.5-flash",
-            google_api_key=GEMINI_API_KEY,
-            temperature=0,
-        )
-    except Exception as e:
-        st.error(f"Failed to initialize LLM: {e}")
-        st.stop()
-    # 2. File Upload
-    uploaded_file = st.file_uploader("Upload your dataset (CSV)", type="csv")
     if uploaded_file:
-        df = pd.read_csv(uploaded_file)
-        with st.expander("📄 Data Preview & Schema"):
-            col1, col2 = st.columns(2)
-            with col1:
-                st.write("**First 5 Rows:**")
-                st.dataframe(df.head())
-            with col2:
-                st.write("**Column Info:**")
-                st.write(df.dtypes)
-        # 3. User Input
-        query = st.text_area(
-            "What would you like to know?",
-            placeholder="e.g., 'What is the correlation between age and salary?' or 'Plot a histogram of sales.'"
-        )
-        if st.button("Run Analysis") and query:
-            # 4. Create the Agent
-            # create_pandas_dataframe_agent wraps the dataframe and the python tool
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
-                agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
-                allow_dangerous_code=True, # Necessary to execute Python code
                 handle_parsing_errors=True
             )
-            # 5. Execute with Streamlit Callbacks
-            # This allows us to see the "Thinking" process in the UI
-            st.subheader("🧠 Agent Thought Process")
-            # This container will show the step-by-step reasoning
             thought_container = st.container()
             st_callback = StreamlitCallbackHandler(thought_container)
-            with st.spinner("Agent is analyzing..."):
                 try:
-                    # The .run() method triggers the agentic loop
                     response = agent.run(query, callbacks=[st_callback])
                     st.markdown("---")
-                    st.subheader("✅ Final Answer")
                     st.success(response)
-                    # Note on Plots:
-                    # If the agent uses plt.show(), it might not render in Streamlit.
-                    # Standard practice for agents is to ask them to use st.pyplot(plt.gcf())
-                    # but the agent often figures out how to display data.
                 except Exception as e:
-                    st.error(f"The agent encountered a critical error: {e}")
-                    st.info("Tip: Try rephrasing your question or checking if column names are clear.")
     else:
-        st.info("👆 Please upload a CSV file to begin.")
-    # --- Sidebar Credits & Info ---
-    with st.sidebar:
-        st.header("How it works")
-        st.markdown("""
-        **1. Thought:** The LLM analyzes your question and the dataframe schema.
-        **2. Action:** It writes and executes Python code using `pandas`.
-        **3. Observation:** It looks at the output of that code.
-        **4. Final Answer:** If the output satisfies the question, it responds. Otherwise, it loops back to step 1.
-        """)
-        if st.button("Clear Cache"):
-            st.cache_data.clear()
-            st.rerun()
 if __name__ == "__main__":
     main()

 import matplotlib.pyplot as plt
 import seaborn as sns
+# Using stable, modern imports to avoid version conflicts
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
 from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
+# --- 1. PAGE SETUP ---
 st.set_page_config(
     page_title="Agentic Data Analyst",
     page_icon="📊",
     layout="wide"
 )
+# Fetch API Key from environment
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def main():
+    st.title("🤖 Agentic Data Analyst")
     st.markdown("""
+    This agent follows an **agentic workflow**: it reasons about your question, writes Python code,
+    observes the output, and self-corrects if it encounters errors.
     """)
+    # Check for API Key
     if not GEMINI_API_KEY:
+        st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable or in Streamlit Secrets.")
         st.stop()
+    # --- 2. DATA LOADING ---
+    uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
     if uploaded_file:
+        # Load and cache for performance
+        @st.cache_data
+        def load_data(file):
+            return pd.read_csv(file)
+        df = load_data(uploaded_file)
+        with st.expander("📄 Data Overview"):
+            st.dataframe(df.head())
+            st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
+        # --- 3. AGENT CONFIGURATION ---
+        query = st.text_area("What analysis would you like to perform?", placeholder="e.g., 'Analyze the relationship between x and y and show a scatter plot.'")
+        if st.button("Run Agent") and query:
+            # Initialize the LLM (using Gemini 2.5 Flash for speed/reasoning balance)
+            llm = ChatGoogleGenerativeAI(
+                model="gemini-2.5-flash-preview-09-2025",
+                google_api_key=GEMINI_API_KEY,
+                temperature=0, # Crucial for deterministic data analysis
+            )
+            # Create the Pandas Agent
+            # Using the string identifier 'zero-shot-react-description' avoids import errors
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
+                agent_type="zero-shot-react-description",
+                allow_dangerous_code=True, # Required to execute Python on the dataframe
                 handle_parsing_errors=True
             )
+            # --- 4. EXECUTION WITH VISUAL CALLBACKS ---
+            st.subheader("🧠 Reasoning & Execution")
+            # This container allows the user to see the agent's step-by-step thinking
             thought_container = st.container()
             st_callback = StreamlitCallbackHandler(thought_container)
+            with st.spinner("Agent is working..."):
                 try:
+                    # Execute the loop
                     response = agent.run(query, callbacks=[st_callback])
                     st.markdown("---")
+                    st.subheader("✅ Final Analysis Result")
                     st.success(response)
                 except Exception as e:
+                    st.error(f"Agent failed to complete the task: {e}")
+                    st.info("Try rephrasing your query or checking if the column names are easy for the AI to understand.")
     else:
+        st.info("👆 Upload a CSV to begin the agentic session.")
 if __name__ == "__main__":
     main()