sasa-agent-2

Sleeping

App Files Files Community

sanjaystarc commited on Dec 14, 2025

Commit

aedecb3

verified ·

1 Parent(s): cc826f9

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -73

app.py CHANGED Viewed

@@ -2,19 +2,42 @@ import os
 import gradio as gr
 import requests
 import pandas as pd
-from smolagents import CodeAgent
 from smolagents.models import LiteLLMModel
-# --------------------------------------------------
-# CONSTANTS (DO NOT CHANGE)
-# --------------------------------------------------
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --------------------------------------------------
-# AGENT (CONSERVATIVE – EXACT MATCH SAFE)
-# --------------------------------------------------
 class BasicAgent:
     def __init__(self):
@@ -22,46 +45,54 @@ class BasicAgent:
             model_id="huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
         )
-        # ❗ No tools on purpose (prevents paraphrasing)
         self.agent = CodeAgent(
-            tools=[],
             model=model,
             instructions=(
-                "You are answering GAIA Level-1 benchmark questions.\n"
                 "Rules:\n"
-                "- Answer ONLY if you are completely certain.\n"
-                "- Output ONLY the exact answer.\n"
-                "- Use the shortest possible answer.\n"
-                "- Do NOT explain.\n"
-                "- Do NOT paraphrase.\n"
-                "- Do NOT add extra words.\n"
-                "- If unsure, output exactly: I don't know\n"
             ),
-            max_steps=2
         )
     def __call__(self, question: str) -> str:
         try:
             raw = self.agent.run(question)
             if not raw:
                 return "I don't know"
             answer = raw.strip()
             answer = answer.replace("\n", " ")
-            answer = answer.strip(" .,:;\"'")
-            # HARD FILTER: GAIA exact-match protection
             if len(answer.split()) > 4:
                 return "I don't know"
             return answer
         except Exception:
             return "I don't know"
-# --------------------------------------------------
-# MAIN EVALUATION + SUBMISSION LOGIC
-# --------------------------------------------------
 def run_and_submit_all(profile: gr.OAuthProfile | None):
@@ -75,95 +106,76 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{DEFAULT_API_URL}/questions"
     submit_url = f"{DEFAULT_API_URL}/submit"
-    # Initialize agent
-    try:
-        agent = BasicAgent()
-    except Exception as e:
-        return f"Error initializing agent: {e}", None
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # Fetch questions
-    try:
-        response = requests.get(questions_url, timeout=15)
-        response.raise_for_status()
-        questions_data = response.json()
-    except Exception as e:
-        return f"Error fetching questions: {e}", None
-    results_log = []
     answers_payload = []
     for item in questions_data:
-        task_id = item.get("task_id")
-        question_text = item.get("question")
-        if not task_id or question_text is None:
-            continue
-        submitted_answer = agent(question_text)
         answers_payload.append({
             "task_id": task_id,
-            "submitted_answer": submitted_answer
         })
         results_log.append({
             "Task ID": task_id,
-            "Question": question_text,
-            "Submitted Answer": submitted_answer
         })
     submission_data = {
-        "username": username.strip(),
         "agent_code": agent_code,
         "answers": answers_payload
     }
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Score: {result_data.get('score')}% "
-            f"({result_data.get('correct_count')}/{result_data.get('total_attempted')} correct)\n"
-            f"Message: {result_data.get('message')}"
-        )
-        return final_status, pd.DataFrame(results_log)
-    except Exception as e:
-        return f"Submission failed: {e}", pd.DataFrame(results_log)
-# --------------------------------------------------
-# GRADIO UI (DO NOT CHANGE)
-# --------------------------------------------------
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Level-1 Agent – Final Assignment")
     gr.Markdown(
         """
-        **Instructions**
-        1. Login with your Hugging Face account
-        2. Click the button below
-        3. Wait for evaluation and submission
         """
     )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
-    status_output = gr.Textbox(label="Submission Result", lines=5)
-    results_table = gr.DataFrame(label="Questions and Agent Answers")
     run_button.click(
         fn=run_and_submit_all,
-        outputs=[status_output, results_table]
     )
 if __name__ == "__main__":

 import gradio as gr
 import requests
 import pandas as pd
+import re
+from smolagents import CodeAgent, tool
 from smolagents.models import LiteLLMModel
+from duckduckgo_search import DDGS
+# ==================================================
+# CONSTANT
+# ==================================================
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+# ==================================================
+# SEARCH TOOL (STRICT)
+# ==================================================
+@tool
+def web_search(query: str) -> str:
+    """
+    Search the web for factual information.
+    Args:
+        query (str): A factual search query.
+    Returns:
+        str: Short factual text from search results.
+    """
+    with DDGS() as ddgs:
+        results = list(ddgs.text(query, max_results=5))
+    if not results:
+        return ""
+    return " ".join(r["body"] for r in results)
+# ==================================================
+# AGENT (HUMAN-LIKE, LEVEL-1 SAFE)
+# ==================================================
 class BasicAgent:
     def __init__(self):
             model_id="huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
         )
         self.agent = CodeAgent(
+            tools=[web_search],
             model=model,
             instructions=(
+                "You answer GAIA Level-1 questions.\n"
+                "Process:\n"
+                "1. Use search if needed.\n"
+                "2. Extract ONLY the short factual answer.\n"
                 "Rules:\n"
+                "- Output ONLY the answer.\n"
+                "- No explanation.\n"
+                "- No full sentences.\n"
+                "- If unclear, output: I don't know\n"
             ),
+            max_steps=3
         )
     def __call__(self, question: str) -> str:
         try:
             raw = self.agent.run(question)
             if not raw:
                 return "I don't know"
             answer = raw.strip()
             answer = answer.replace("\n", " ")
+            # Remove common filler words humans add
+            answer = re.sub(r"^(the|a|an)\s+", "", answer, flags=re.I)
+            # Remove punctuation
+            answer = answer.strip(" .,:;\"'()")
+            # GAIA answers are SHORT
             if len(answer.split()) > 4:
                 return "I don't know"
+            # Avoid explanations
+            if any(x in answer.lower() for x in [" is ", " was ", " are "]):
+                return "I don't know"
             return answer
         except Exception:
             return "I don't know"
+# ==================================================
+# RUN + SUBMIT (TEMPLATE LOGIC – UNCHANGED)
+# ==================================================
 def run_and_submit_all(profile: gr.OAuthProfile | None):
     questions_url = f"{DEFAULT_API_URL}/questions"
     submit_url = f"{DEFAULT_API_URL}/submit"
+    agent = BasicAgent()
     agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
     # Fetch questions
+    response = requests.get(questions_url, timeout=15)
+    questions_data = response.json()
     answers_payload = []
+    results_log = []
     for item in questions_data:
+        task_id = item["task_id"]
+        question = item["question"]
+        answer = agent(question)
         answers_payload.append({
             "task_id": task_id,
+            "submitted_answer": answer
         })
         results_log.append({
             "Task ID": task_id,
+            "Question": question,
+            "Submitted Answer": answer
         })
     submission_data = {
+        "username": username,
         "agent_code": agent_code,
         "answers": answers_payload
     }
+    response = requests.post(submit_url, json=submission_data, timeout=60)
+    result = response.json()
+    status = (
+        f"Submission Successful!\n"
+        f"User: {result.get('username')}\n"
+        f"Score: {result.get('score')}% "
+        f"({result.get('correct_count')}/{result.get('total_attempted')} correct)\n"
+        f"Message: {result.get('message')}"
+    )
+    return status, pd.DataFrame(results_log)
+# ==================================================
+# GRADIO UI
+# ==================================================
 with gr.Blocks() as demo:
     gr.Markdown("# GAIA Level-1 Agent – Final Assignment")
     gr.Markdown(
         """
+        1. Login with Hugging Face
+        2. Click run
+        3. Wait for submission
         """
     )
     gr.LoginButton()
     run_button = gr.Button("Run Evaluation & Submit All Answers")
+    status_output = gr.Textbox(label="Submission Result", lines=6)
+    table_output = gr.DataFrame(label="Questions & Answers")
     run_button.click(
         fn=run_and_submit_all,
+        outputs=[status_output, table_output]
     )
 if __name__ == "__main__":