Final_Assignment_Template

Sleeping

App Files Files Community

likki1715 commited on Mar 2

Commit

238a105

verified ·

1 Parent(s): 8c2b20e

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -34

app.py CHANGED Viewed

@@ -11,6 +11,29 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Tool Implementations ---
 def web_search(query: str) -> str:
     try:
         with DDGS() as ddgs:
@@ -24,7 +47,6 @@ def web_search(query: str) -> str:
     except Exception as e:
         return f"Search error: {e}"
 def wikipedia_search(query: str) -> str:
     try:
         search_url = "https://en.wikipedia.org/w/api.php"
@@ -49,7 +71,6 @@ def wikipedia_search(query: str) -> str:
     except Exception as e:
         return f"Wikipedia error: {e}"
 def run_python(code: str) -> str:
     import sys
     from io import StringIO
@@ -59,20 +80,19 @@ def run_python(code: str) -> str:
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
-        return output if output else "Code executed (no output)."
     except Exception as e:
-        return f"Error: {e}"
     finally:
         sys.stdout = old_stdout
 class SmartAgent:
     def __init__(self):
         self.api_key = os.getenv("GROQ_API_KEY")
         if not self.api_key:
             raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
-        # Groq's OpenAI-compatible endpoint
         self.api_url = "https://api.groq.com/openai/v1/chat/completions"
         self.model = "llama-3.3-70b-versatile"
         print(f"SmartAgent initialized with Groq ({self.model})")
@@ -89,7 +109,6 @@ class SmartAgent:
             "max_tokens": 1024
         }
-        # Exponential backoff retry mechanism
         wait_times = [15, 30, 60]
         for attempt, wait_time in enumerate(wait_times):
             try:
@@ -99,7 +118,7 @@ class SmartAgent:
                 return data["choices"][0]["message"]["content"].strip()
             except requests.exceptions.HTTPError as e:
                 status = response.status_code
-                if status == 429 or status == 503:
                     print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
                     time.sleep(wait_time)
                 else:
@@ -107,36 +126,50 @@ class SmartAgent:
         raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
-    def __call__(self, question: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
-        system = """You are a precise AI assistant solving benchmark questions.
-You can use these tools by outputting exactly:
 SEARCH: <query>
 WIKIPEDIA: <query>
-PYTHON: <code>
-After gathering enough info, give your final answer as:
 ANSWER: <your exact short answer>
-Rules for the answer:
 - DO NOT wrap the answer in "FINAL ANSWER: " or any other text. Output strictly "ANSWER: " followed by the exact answer string.
-- Numbers only (no units unless asked, no commas in numbers)
 - Short phrases (no articles like a/the, no abbreviations for proper nouns)
 - Comma-separated list if multiple items needed
-- Exact match required - be very precise"""
-        conversation = f"{system}\n\nQuestion: {question}"
-        for iteration in range(6):
-            # Groq is fast, but we pause slightly to respect free tier limits
-            time.sleep(2.5)
             response = self.call_llm(conversation)
-            print(f"  LLM [{iteration}]: {response[:200]}")
-            # Check for final answer
             answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
             if answer_match:
                 answer = answer_match.group(1).strip()
@@ -148,6 +181,7 @@ Rules for the answer:
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
             if not python_match:
                 python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nANSWER|$)', response, re.DOTALL)
@@ -161,16 +195,15 @@ Rules for the answer:
                 tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
             elif python_match:
                 code = python_match.group(1).strip()
-                print(f"  Tool: python({code[:50]})")
                 tool_result = f"Python output:\n{run_python(code)}"
-            if tool_result:
-                conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your ANSWER: <answer>"
             else:
-                conversation += f"\n\nAssistant: {response}\n\nProvide your final answer as: ANSWER: <answer>"
-        # Final attempt fallback
-        conversation += "\n\nGive only the final answer as: ANSWER: <answer>"
         last = self.call_llm(conversation)
         answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
         if answer_match:
@@ -210,21 +243,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     results_log = []
     answers_payload = []
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
-        # Pausing between questions to ensure safety against rate limits
-        time.sleep(3)
     if not answers_payload:
         return "Agent did not produce any answers.", pd.DataFrame(results_log)
@@ -262,8 +297,9 @@ with gr.Blocks() as demo:
         **Instructions:**
         1. Make sure `GROQ_API_KEY` is set in your Space secrets
-        2. Log in with your Hugging Face account below
-        3. Click **Run Evaluation & Submit All Answers**
         """
     )
     gr.LoginButton()

 # --- Tool Implementations ---
+def download_task_file(task_id: str) -> str:
+    """Checks the API for attached files and downloads them to the working directory."""
+    url = f"{DEFAULT_API_URL}/files/{task_id}"
+    try:
+        response = requests.get(url, timeout=15)
+        if response.status_code == 200:
+            # Try to grab the exact filename from the headers
+            cd = response.headers.get('content-disposition')
+            filename = f"file_{task_id[:8]}.tmp"
+            if cd:
+                match = re.search(r'filename="?([^"]+)"?', cd)
+                if match:
+                    filename = match.group(1)
+            # Save the file locally
+            with open(filename, 'wb') as f:
+                f.write(response.content)
+            return f"\n[System Alert: A file required for this task was downloaded as '{filename}'. Use your PYTHON tool to read and analyze it.]\n"
+        return "" # No file attached
+    except Exception as e:
+        print(f"File download error for {task_id}: {e}")
+        return ""
 def web_search(query: str) -> str:
     try:
         with DDGS() as ddgs:
     except Exception as e:
         return f"Search error: {e}"
 def wikipedia_search(query: str) -> str:
     try:
         search_url = "https://en.wikipedia.org/w/api.php"
     except Exception as e:
         return f"Wikipedia error: {e}"
 def run_python(code: str) -> str:
     import sys
     from io import StringIO
         exec_globals = {}
         exec(code, exec_globals)
         output = sys.stdout.getvalue()
+        # Remind the agent to use print() if it forgot
+        return output if output else "Code executed successfully, but printed nothing. Use print() to see data."
     except Exception as e:
+        return f"Python execution error: {e}"
     finally:
         sys.stdout = old_stdout
 class SmartAgent:
     def __init__(self):
         self.api_key = os.getenv("GROQ_API_KEY")
         if not self.api_key:
             raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
         self.api_url = "https://api.groq.com/openai/v1/chat/completions"
         self.model = "llama-3.3-70b-versatile"
         print(f"SmartAgent initialized with Groq ({self.model})")
             "max_tokens": 1024
         }
         wait_times = [15, 30, 60]
         for attempt, wait_time in enumerate(wait_times):
             try:
                 return data["choices"][0]["message"]["content"].strip()
             except requests.exceptions.HTTPError as e:
                 status = response.status_code
+                if status in [429, 503, 500]:
                     print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
                     time.sleep(wait_time)
                 else:
         raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
+    def __call__(self, question: str, task_id: str) -> str:
         print(f"\nQuestion: {question[:100]}...")
+        # Download file if it exists and pass the filename to the prompt
+        file_alert = download_task_file(task_id)
+        system = """You are an elite AI assistant solving benchmark questions. You must think step-by-step.
+You have access to a Python environment, Wikipedia, and Web Search.
+You MUST use tools by outputting exactly one of the following formats per step:
+THOUGHT: <your reasoning for what to do next>
 SEARCH: <query>
+Or:
+THOUGHT: <your reasoning>
 WIKIPEDIA: <query>
+Or:
+THOUGHT: <your reasoning>
+PYTHON:
+```python
+<your python code here (make sure to use print() to see outputs)>
+```
+When you are 100% sure you have the final answer, output:
+THOUGHT: <your final reasoning>
 ANSWER: <your exact short answer>
+Rules for the ANSWER:
 - DO NOT wrap the answer in "FINAL ANSWER: " or any other text. Output strictly "ANSWER: " followed by the exact answer string.
+- Numbers only (no units unless asked, no commas in numbers, express money as numbers with decimals)
 - Short phrases (no articles like a/the, no abbreviations for proper nouns)
 - Comma-separated list if multiple items needed
+- Exact match required - be very precise!"""
+        conversation = f"{system}\n\nQuestion: {question}{file_alert}"
+        for iteration in range(7):
+            time.sleep(2.5) # Groq rate limit safety
             response = self.call_llm(conversation)
+            print(f"  LLM [{iteration}]: \n{response[:300]}...\n")
             answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
             if answer_match:
                 answer = answer_match.group(1).strip()
             search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
             wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
             python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
             if not python_match:
                 python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nANSWER|$)', response, re.DOTALL)
                 tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
             elif python_match:
                 code = python_match.group(1).strip()
+                print(f"  Tool: python({code[:50]}...)")
                 tool_result = f"Python output:\n{run_python(code)}"
             else:
+                tool_result = "No valid tool call detected. Please use SEARCH, WIKIPEDIA, PYTHON, or ANSWER."
+            conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your next THOUGHT and tool, or your ANSWER."
+        # Fallback if it runs out of iterations
+        conversation += "\n\nYou are out of steps. Give only the final answer as: ANSWER: <answer>"
         last = self.call_llm(conversation)
         answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
         if answer_match:
     results_log = []
     answers_payload = []
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             continue
         try:
+            # We now pass the task_id to the agent so it can download the file!
+            submitted_answer = agent(question_text, task_id)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
             print(f"Error on task {task_id}: {e}")
             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
+        time.sleep(3) # Safety pause between questions
     if not answers_payload:
         return "Agent did not produce any answers.", pd.DataFrame(results_log)
         **Instructions:**
         1. Make sure `GROQ_API_KEY` is set in your Space secrets
+        2. Make sure `pandas` and `openpyxl` are in your `requirements.txt`
+        3. Log in with your Hugging Face account below
+        4. Click **Run Evaluation & Submit All Answers**
         """
     )
     gr.LoginButton()