data-analysis-agent-2.0

Sleeping

App Files Files Community

sanjaystarc commited on Dec 25, 2025

Commit

987cfe3

verified ·

1 Parent(s): cec50fe

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -97

app.py CHANGED Viewed

@@ -6,112 +6,106 @@ import seaborn as sns
 import time
 import random
-# Updated LangChain Imports
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
-from langchain_community.callbacks.streamlit import StreamlitCallbackHandler
-# --- 1. PAGE SETUP ---
 st.set_page_config(
-    page_title="Agentic Data Analyst",
-    page_icon="📊",
     layout="wide"
 )
-# Fetch API Key from environment
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def main():
-    st.title("🤖 Agentic Data Analyst")
     st.markdown("""
-    This agent follows an **agentic workflow**: it reasons, writes code,
-    observes results, and self-corrects.
     """)
     if not GEMINI_API_KEY:
-        st.error("❌ Missing `GEMINI_API_KEY`. Please set it as an environment variable.")
         st.stop()
-    # --- 2. DATA LOADING ---
     uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
     if uploaded_file:
         @st.cache_data
         def load_data(file):
             return pd.read_csv(file)
         df = load_data(uploaded_file)
         with st.expander("📄 Data Overview"):
             st.dataframe(df.head())
-            st.info(f"Dataset contains {df.shape[0]} rows and {df.shape[1]} columns.")
-        # --- 3. AGENT CONFIGURATION ---
-        query = st.text_area("What analysis would you like to perform?",
-                             placeholder="e.g., 'Plot the distribution of prices.'")
         if st.button("Run Agent") and query:
-            # Initialize LLM
             llm = ChatGoogleGenerativeAI(
-                model="gemini-2.5-flash-preview-09-2025",
                 google_api_key=GEMINI_API_KEY,
                 temperature=0,
-                max_retries=6
             )
-            # Add dataframe context manually in prefix
-            df_context = f"The dataframe 'df' has the following columns: {', '.join(df.columns)}"
             custom_prefix = f"""
-            You are a professional Python data analyst working inside a Streamlit+Pandas agent.
-            The dataframe is named `df`.
-           {df_context}
-         🚨 STRICT RULES YOU MUST FOLLOW 🚨
-         # 1 — CODE QUALITY RULES
-         - Code MUST be short, clean, and correct.
-         - NEVER repeat imports. Use ONLY:
-               import matplotlib.pyplot as plt
-               import seaborn as sns
-         - NEVER import streamlit inside the Action Input. (The environment already has `st`.)
-         - NEVER use .copy() unless absolutely necessary.
-         - NEVER print() results — always return plots or data.
-         - NEVER write long multi-step code unless required.
-         # 2 — PLOTTING RULES
-         - Before plotting any filtered dataframe, always check: if filtered.empty:
-         - Always start plots with:
-             plt.figure()
-         - Always end plots with:
-             st.pyplot(plt.gcf())
-        # 3 — OUTPUT FORMAT RULES
-        - If you need to run code → respond ONLY with:
-             Action: python_repl_ast
-             Action Input:
-            <python code only>
-        - If no code is needed → respond ONLY with:
-             Final Answer: <answer>
-        # 4 — WHAT YOU ARE OPTIMIZED FOR
-        - Reliable code
-        - Short code
-        - Zero unnecessary steps
-        - Zero repeated imports
-        - Zero noisy output
-        - Zero markdown
-         Follow these rules EXACTLY.
-            """
-            # IMPORTANT: No suffix → avoids ValueError
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
-                agent_type="zero-shot-react-description",  # required agent type
                 allow_dangerous_code=True,
                 prefix=custom_prefix,
                 include_df_in_prompt=False,
@@ -119,38 +113,26 @@ def main():
                 agent_executor_kwargs={"handle_parsing_errors": True}
             )
-            # --- 4. EXECUTION ---
             st.subheader("🧠 Reasoning & Execution")
-            thought_container = st.container()
-            st_callback = StreamlitCallbackHandler(thought_container)
-            with st.spinner("Agent is analyzing..."):
-                max_loop_retries = 3
-                for attempt in range(max_loop_retries):
-                    try:
-                        response = agent.run(query, callbacks=[st_callback])
-                        st.markdown("---")
-                        st.subheader("✅ Final Analysis Result")
-                        st.success(response)
-                        break
-                    except Exception as e:
-                        if "429" in str(e) or "RESOURCE_EXHAUSTED" in str(e):
-                            if attempt < max_loop_retries - 1:
-                                wait_time = (2 ** attempt) + random.random()
-                                st.warning(f"Quota reached. Retrying in {wait_time:.2f} seconds...")
-                                time.sleep(wait_time)
-                                continue
-                            else:
-                                st.error("Rate limit exceeded consistently. Try again later.")
-                        else:
-                            st.error("Agent encountered a parsing or execution error.")
-                            with st.expander("Show Technical Error"):
-                                st.code(str(e))
-                            break
     else:
-        st.info("👆 Upload a CSV to begin.")
 if __name__ == "__main__":
     main()

 import time
 import random
+# LangChain + Gemini
 from langchain_google_genai import ChatGoogleGenerativeAI
 from langchain_experimental.agents.agent_toolkits import create_pandas_dataframe_agent
+# --- PAGE SETUP ---
 st.set_page_config(
+    page_title="Agentic Data Analyst",
+    page_icon="📊",
     layout="wide"
 )
 GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
 def main():
+    st.title("🤖 Agentic Data Analyst (Gemini 2.5 Flash)")
     st.markdown("""
+    This agent intelligently analyzes your dataset using an agentic workflow.
+    It writes Python code, executes it, and returns insights.
     """)
     if not GEMINI_API_KEY:
+        st.error("❌ Missing `GEMINI_API_KEY`. Set it as an environment variable.")
         st.stop()
+    # --- CSV UPLOAD ---
     uploaded_file = st.file_uploader("Upload your CSV file", type="csv")
     if uploaded_file:
         @st.cache_data
         def load_data(file):
             return pd.read_csv(file)
         df = load_data(uploaded_file)
         with st.expander("📄 Data Overview"):
             st.dataframe(df.head())
+            st.info(f"Rows: {df.shape[0]} | Columns: {df.shape[1]}")
+        # --- USER QUERY ---
+        query = st.text_area(
+            "What analysis would you like to perform?",
+            placeholder="e.g., Plot Price distribution"
+        )
         if st.button("Run Agent") and query:
+            # --- LLM INIT (Gemini 2.5 Flash / Non-streaming fix) ---
             llm = ChatGoogleGenerativeAI(
+                model="gemini-2.5-flash",
                 google_api_key=GEMINI_API_KEY,
                 temperature=0,
+                max_retries=5,
+                streaming=False                     # IMPORTANT: avoids chunking error
             )
+            # Provide dataframe context
+            df_context = f"The dataframe 'df' has columns: {', '.join(df.columns)}"
+            # --- PREFIX (clean, reliable code rules) ---
             custom_prefix = f"""
+You are a professional Python data analyst running inside a Streamlit + Pandas agent.
+The dataframe is named `df`.
+{df_context}
+🚨 STRICT RULES 🚨
+# 1 — CODE QUALITY
+- Code must be short, clean, correct.
+- Never repeat imports.
+- Only allowed imports inside Action Input:
+    import matplotlib.pyplot as plt
+    import seaborn as sns
+- Never import streamlit.
+- Never print().
+- Never wrap outputs in markdown.
+# 2 — PLOTTING RULES
+- Before plotting filtered data, check if filtered.empty.
+- Always start plots with: plt.figure()
+- Always end plots with: st.pyplot(plt.gcf())
+# 3 — OUTPUT FORMAT
+- If code is required → return ONLY:
+    Action: python_repl_ast
+    Action Input:
+    <python code only>
+- If no code is needed → return ONLY:
+    Final Answer: <answer>
+Follow these rules EXACTLY.
+"""
+            # --- CREATE AGENT ---
             agent = create_pandas_dataframe_agent(
                 llm,
                 df,
                 verbose=True,
+                agent_type="zero-shot-react-description",
                 allow_dangerous_code=True,
                 prefix=custom_prefix,
                 include_df_in_prompt=False,
                 agent_executor_kwargs={"handle_parsing_errors": True}
             )
+            # --- EXECUTION ---
             st.subheader("🧠 Reasoning & Execution")
+            with st.spinner("Agent analyzing..."):
+                try:
+                    # no callback (Gemini 2.5 streaming not supported)
+                    response = agent.run(query)
+                    st.markdown("---")
+                    st.subheader("✅ Final Analysis Result")
+                    st.success(response)
+                except Exception as e:
+                    st.error("Agent encountered an error.")
+                    with st.expander("Show Technical Error"):
+                        st.code(str(e))
     else:
+        st.info("👆 Upload a CSV file to begin.")
 if __name__ == "__main__":
     main()