Final_Assignment_Template

Sleeping

App Files Files Community

likki1715 commited on Mar 4

Commit

5e36f36

verified ·

1 Parent(s): bda8117

Update app.py

Browse files

Files changed (1) hide show

app.py +80 -96

app.py CHANGED Viewed

@@ -22,24 +22,17 @@ def download_task_file(task_id: str) -> str:
                 match = re.search(r'filename="?([^"]+)"?', cd)
                 if match:
                     filename = match.group(1)
             with open(filename, 'wb') as f:
                 f.write(response.content)
-            # Give the agent explicit instructions on how to read the file
             return (
-                f"\n[SYSTEM: A file '{filename}' has been downloaded for this task. "
-                f"You MUST read it immediately as your FIRST action using:\n"
-                f"PYTHON:\n```python\n"
-                f"# Try reading as text first\n"
                 f"try:\n"
                 f"    with open('{filename}', 'r', encoding='utf-8') as f:\n"
                 f"        print(f.read())\n"
                 f"except:\n"
-                f"    # If binary, read as bytes and show first 2000 chars\n"
                 f"    with open('{filename}', 'rb') as f:\n"
-                f"        data = f.read()\n"
-                f"    print(repr(data[:2000]))\n"
                 f"```\n]\n"
             )
         return ""
@@ -52,24 +45,23 @@ def web_search(query: str) -> str:
     try:
         from ddgs import DDGS
         with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=7))
         if not results:
             return "No results found."
         output = []
         for r in results:
-            output.append(f"Title: {r.get('title', '')}\nURL: {r.get('href', '')}\nSnippet: {r.get('body', '')}")
         return "\n---\n".join(output)
     except ImportError:
-        # Fallback to old package name
         try:
             from duckduckgo_search import DDGS
             with DDGS() as ddgs:
-                results = list(ddgs.text(query, max_results=7))
             if not results:
                 return "No results found."
             output = []
             for r in results:
-                output.append(f"Title: {r.get('title', '')}\nURL: {r.get('href', '')}\nSnippet: {r.get('body', '')}")
             return "\n---\n".join(output)
         except Exception as e:
             return f"Search error: {e}"
@@ -83,20 +75,16 @@ def web_fetch(url: str) -> str:
         headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
         response = requests.get(url, timeout=20, headers=headers)
         response.raise_for_status()
-        # Try to parse as HTML
         try:
             from bs4 import BeautifulSoup
             soup = BeautifulSoup(response.text, "html.parser")
-            # Remove scripts and styles
             for tag in soup(["script", "style", "nav", "footer"]):
                 tag.decompose()
             text = soup.get_text(separator="\n", strip=True)
-            # Collapse excessive newlines
             text = re.sub(r'\n{3,}', '\n\n', text)
-            return text[:2000]
         except ImportError:
-            return response.text[:2000]
     except Exception as e:
         return f"Fetch error: {e}"
@@ -120,7 +108,7 @@ def wikipedia_search(query: str) -> str:
         pages = summary_data.get("query", {}).get("pages", {})
         for page_id, page in pages.items():
             extract = page.get("extract", "No content available.")
-            return f"Wikipedia: {title}\n\n{extract[:2000]}"
         return "No content found."
     except Exception as e:
         return f"Wikipedia error: {e}"
@@ -135,9 +123,9 @@ def run_python(code: str) -> str:
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
-        return output if output else "Code executed successfully, but printed nothing. Use print() to see data."
     except Exception as e:
-        return f"Python execution error: {e}"
     finally:
         sys.stdout = old_stdout
@@ -146,13 +134,16 @@ class SmartAgent:
     def __init__(self):
         self.api_key = os.getenv("GROQ_API_KEY")
         if not self.api_key:
-            raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
         self.api_url = "https://api.groq.com/openai/v1/chat/completions"
         self.model = "llama-3.1-8b-instant"
         print(f"SmartAgent initialized with Groq ({self.model})")
     def call_llm(self, prompt: str) -> str:
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json"
@@ -161,7 +152,7 @@ class SmartAgent:
             "model": self.model,
             "messages": [{"role": "user", "content": prompt}],
             "temperature": 0.1,
-            "max_tokens": 1024
         }
         wait_times = [20, 40, 80]
@@ -174,104 +165,97 @@ class SmartAgent:
             except requests.exceptions.HTTPError as e:
                 status = response.status_code
                 if status in [429, 503, 500]:
-                    print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
                     time.sleep(wait_time)
                 else:
                     raise e
-        raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
         file_alert = download_task_file(task_id)
-        system = """You are an elite AI assistant solving benchmark questions. You must think step-by-step.
-You have access to these tools:
-- SEARCH: Search the web for current information
-- WIKIPEDIA: Search Wikipedia for factual information
-- FETCH: Retrieve and read the full content of a URL
-- PYTHON: Execute Python code (always use print() to see output)
-You MUST use tools by outputting EXACTLY one of these formats per step:
-THOUGHT: <your reasoning>
-SEARCH: <query>
-THOUGHT: <your reasoning>
-WIKIPEDIA: <query>
-THOUGHT: <your reasoning>
-FETCH: <full URL>
-THOUGHT: <your reasoning>
-PYTHON:
-```python
-<your code - always use print() to see results>
-```
-When 100% sure of the answer:
-THOUGHT: <final reasoning>
-ANSWER: <exact answer>
-CRITICAL RULES:
-- If a file was downloaded, READ IT FIRST before doing anything else
-- For URLs in questions, use FETCH to read them directly
-- For Wikipedia pages, use FETCH on the actual Wikipedia URL for full content
-- ANSWER must be exact: numbers only (no units unless asked), short phrases, comma-separated lists
-- Do NOT include "FINAL ANSWER:" prefix - just "ANSWER: " followed by the answer
-- Never give up - always try a different approach if one fails"""
-        conversation = f"{system}\n\nQuestion: {question}{file_alert}"
         for iteration in range(10):
-            time.sleep(8)  # Increased rate limit safety
-            response = self.call_llm(conversation)
-            print(f"  LLM [{iteration}]: \n{response[:300]}...\n")
             # Parse tool calls
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
-            fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
-            if not python_match:
-                python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nFETCH|\nANSWER|$)', response, re.DOTALL)
-            tool_result = None
             if fetch_match:
                 url = fetch_match.group(1).strip()
-                print(f"  Tool: web_fetch({url[:80]})")
-                tool_result = f"Page content from '{url}':\n{web_fetch(url)}"
             elif search_match:
                 query = search_match.group(1).strip()
-                print(f"  Tool: web_search({query})")
-                tool_result = f"Search results for '{query}':\n{web_search(query)}"
             elif wiki_match:
                 query = wiki_match.group(1).strip()
-                print(f"  Tool: wikipedia({query})")
-                tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
             elif python_match:
                 code = python_match.group(1).strip()
-                print(f"  Tool: python({code[:80]}...)")
-                tool_result = f"Python output:\n{run_python(code)}"
             else:
-                answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
-                if answer_match:
-                    answer = answer_match.group(1).strip()
-                    print(f"  Final Answer: {answer}")
-                    return answer
-                tool_result = "No valid tool call detected. Please use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."
-            conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your next THOUGHT and tool, or your ANSWER."
-        # Fallback
-        conversation += "\n\nYou are out of steps. Give ONLY the final answer as: ANSWER: <answer>"
-        last = self.call_llm(conversation)
         answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
         if answer_match:
             return answer_match.group(1).strip()
@@ -325,7 +309,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
-        time.sleep(15)  # Increased safety pause between questions
     if not answers_payload:
         return "Agent did not produce any answers.", pd.DataFrame(results_log)
@@ -359,10 +343,10 @@ with gr.Blocks() as demo:
     gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
     gr.Markdown(
         """
-        **Powered by Groq (Llama 3.3 70B)**
         **Instructions:**
         1. Make sure `GROQ_API_KEY` is set in your Space secrets
-        2. Make sure your `requirements.txt` includes: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
         3. Log in with your Hugging Face account below
         4. Click **Run Evaluation & Submit All Answers**
         """
@@ -376,6 +360,6 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "=" * 30 + " Application Startup " + "=" * 30)
     print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}")
-    print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}")
     print("=" * 81 + "\n")
     demo.launch(debug=True, share=False)

                 match = re.search(r'filename="?([^"]+)"?', cd)
                 if match:
                     filename = match.group(1)
             with open(filename, 'wb') as f:
                 f.write(response.content)
             return (
+                f"\n[SYSTEM: File '{filename}' downloaded. "
+                f"READ IT FIRST using PYTHON:\n```python\n"
                 f"try:\n"
                 f"    with open('{filename}', 'r', encoding='utf-8') as f:\n"
                 f"        print(f.read())\n"
                 f"except:\n"
                 f"    with open('{filename}', 'rb') as f:\n"
+                f"        print(repr(f.read()[:2000]))\n"
                 f"```\n]\n"
             )
         return ""
     try:
         from ddgs import DDGS
         with DDGS() as ddgs:
+            results = list(ddgs.text(query, max_results=5))
         if not results:
             return "No results found."
         output = []
         for r in results:
+            output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:200]}")
         return "\n---\n".join(output)
     except ImportError:
         try:
             from duckduckgo_search import DDGS
             with DDGS() as ddgs:
+                results = list(ddgs.text(query, max_results=5))
             if not results:
                 return "No results found."
             output = []
             for r in results:
+                output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:200]}")
             return "\n---\n".join(output)
         except Exception as e:
             return f"Search error: {e}"
         headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
         response = requests.get(url, timeout=20, headers=headers)
         response.raise_for_status()
         try:
             from bs4 import BeautifulSoup
             soup = BeautifulSoup(response.text, "html.parser")
             for tag in soup(["script", "style", "nav", "footer"]):
                 tag.decompose()
             text = soup.get_text(separator="\n", strip=True)
             text = re.sub(r'\n{3,}', '\n\n', text)
+            return text[:1500]
         except ImportError:
+            return response.text[:1500]
     except Exception as e:
         return f"Fetch error: {e}"
         pages = summary_data.get("query", {}).get("pages", {})
         for page_id, page in pages.items():
             extract = page.get("extract", "No content available.")
+            return f"Wikipedia: {title}\n\n{extract[:1500]}"
         return "No content found."
     except Exception as e:
         return f"Wikipedia error: {e}"
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
+        return output[:1000] if output else "Code ran but printed nothing. Use print() to see data."
     except Exception as e:
+        return f"Python error: {e}"
     finally:
         sys.stdout = old_stdout
     def __init__(self):
         self.api_key = os.getenv("GROQ_API_KEY")
         if not self.api_key:
+            raise ValueError("GROQ_API_KEY not set!")
         self.api_url = "https://api.groq.com/openai/v1/chat/completions"
         self.model = "llama-3.1-8b-instant"
         print(f"SmartAgent initialized with Groq ({self.model})")
     def call_llm(self, prompt: str) -> str:
+        # Hard cap prompt to avoid 413
+        if len(prompt) > 8000:
+            prompt = prompt[:3000] + "\n\n[...context trimmed...]\n\n" + prompt[-3000:]
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json"
             "model": self.model,
             "messages": [{"role": "user", "content": prompt}],
             "temperature": 0.1,
+            "max_tokens": 512
         }
         wait_times = [20, 40, 80]
             except requests.exceptions.HTTPError as e:
                 status = response.status_code
                 if status in [429, 503, 500]:
+                    print(f"Groq API Error ({status})! Waiting {wait_time}s... (Attempt {attempt+1}/3)")
                     time.sleep(wait_time)
                 else:
                     raise e
+        raise Exception("Failed after 3 attempts.")
     def __call__(self, question: str, task_id: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
         file_alert = download_task_file(task_id)
+        system = """You are an expert AI solving benchmark questions. Think step by step.
+TOOLS AVAILABLE:
+- SEARCH: <query>          (web search)
+- WIKIPEDIA: <query>       (Wikipedia lookup)
+- FETCH: <url>             (read a webpage)
+- PYTHON: ```python ... ``` (run code, always use print())
+ANSWER FORMAT: When done, output exactly:
+ANSWER: <your answer>
+RULES:
+- Read any downloaded file FIRST with PYTHON
+- Use FETCH to read URLs mentioned in questions
+- Be concise - one tool per step
+- Numbers: no units unless asked, no commas
+- Short phrases, no articles (a/the)
+- Comma-separated if multiple items needed"""
+        # Keep a short rolling history instead of full conversation
+        history = []
+        initial_prompt = f"{system}\n\nQuestion: {question}{file_alert}"
         for iteration in range(10):
+            time.sleep(8)
+            # Build prompt from system + question + last 3 exchanges only
+            if not history:
+                prompt = initial_prompt
+            else:
+                recent = history[-3:]
+                exchanges = "\n\n".join([
+                    f"Step {i+1}:\nAction: {h['action']}\nResult: {h['result'][:400]}"
+                    for i, h in enumerate(recent)
+                ])
+                prompt = f"{system}\n\nQuestion: {question}{file_alert}\n\nPrevious steps:\n{exchanges}\n\nContinue:"
+            response = self.call_llm(prompt)
+            print(f"  LLM [{iteration}]: {response[:200]}...")
             # Parse tool calls
+            fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
+            answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
             if fetch_match:
                 url = fetch_match.group(1).strip()
+                print(f"  Tool: FETCH({url[:80]})")
+                result = web_fetch(url)
+                history.append({"action": f"FETCH: {url}", "result": result})
             elif search_match:
                 query = search_match.group(1).strip()
+                print(f"  Tool: SEARCH({query})")
+                result = web_search(query)
+                history.append({"action": f"SEARCH: {query}", "result": result})
             elif wiki_match:
                 query = wiki_match.group(1).strip()
+                print(f"  Tool: WIKIPEDIA({query})")
+                result = wikipedia_search(query)
+                history.append({"action": f"WIKIPEDIA: {query}", "result": result})
             elif python_match:
                 code = python_match.group(1).strip()
+                print(f"  Tool: PYTHON({code[:60]}...)")
+                result = run_python(code)
+                history.append({"action": f"PYTHON: {code[:100]}", "result": result})
+            elif answer_match:
+                answer = answer_match.group(1).strip()
+                print(f"  Final Answer: {answer}")
+                return answer
             else:
+                history.append({"action": "none", "result": "No valid tool found. Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
+        # Fallback: force answer
+        recent = history[-3:]
+        exchanges = "\n\n".join([f"Action: {h['action']}\nResult: {h['result'][:300]}" for h in recent])
+        fallback_prompt = f"{system}\n\nQuestion: {question}\n\nSteps taken:\n{exchanges}\n\nOut of steps. Give ONLY: ANSWER: <answer>"
+        last = self.call_llm(fallback_prompt)
         answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
         if answer_match:
             return answer_match.group(1).strip()
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
+        time.sleep(15)
     if not answers_payload:
         return "Agent did not produce any answers.", pd.DataFrame(results_log)
     gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
     gr.Markdown(
         """
+        **Powered by Groq (Llama 3.1 8B Instant)**
         **Instructions:**
         1. Make sure `GROQ_API_KEY` is set in your Space secrets
+        2. `requirements.txt` must include: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
         3. Log in with your Hugging Face account below
         4. Click **Run Evaluation & Submit All Answers**
         """
 if __name__ == "__main__":
     print("\n" + "=" * 30 + " Application Startup " + "=" * 30)
     print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}")
+    print(f"SPACE_ID:   {os.getenv('SPACE_ID', 'not set')}")
     print("=" * 81 + "\n")
     demo.launch(debug=True, share=False)