sanjaystarc commited on
Commit
aedecb3
·
verified ·
1 Parent(s): cc826f9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -73
app.py CHANGED
@@ -2,19 +2,42 @@ import os
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
 
5
 
6
- from smolagents import CodeAgent
7
  from smolagents.models import LiteLLMModel
 
8
 
9
- # --------------------------------------------------
10
- # CONSTANTS (DO NOT CHANGE)
11
- # --------------------------------------------------
12
 
13
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
14
 
15
- # --------------------------------------------------
16
- # AGENT (CONSERVATIVE – EXACT MATCH SAFE)
17
- # --------------------------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  class BasicAgent:
20
  def __init__(self):
@@ -22,46 +45,54 @@ class BasicAgent:
22
  model_id="huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
23
  )
24
 
25
- # ❗ No tools on purpose (prevents paraphrasing)
26
  self.agent = CodeAgent(
27
- tools=[],
28
  model=model,
29
  instructions=(
30
- "You are answering GAIA Level-1 benchmark questions.\n"
 
 
 
31
  "Rules:\n"
32
- "- Answer ONLY if you are completely certain.\n"
33
- "- Output ONLY the exact answer.\n"
34
- "- Use the shortest possible answer.\n"
35
- "- Do NOT explain.\n"
36
- "- Do NOT paraphrase.\n"
37
- "- Do NOT add extra words.\n"
38
- "- If unsure, output exactly: I don't know\n"
39
  ),
40
- max_steps=2
41
  )
42
 
43
  def __call__(self, question: str) -> str:
44
  try:
45
  raw = self.agent.run(question)
46
-
47
  if not raw:
48
  return "I don't know"
49
 
50
  answer = raw.strip()
51
  answer = answer.replace("\n", " ")
52
- answer = answer.strip(" .,:;\"'")
53
 
54
- # HARD FILTER: GAIA exact-match protection
 
 
 
 
 
 
55
  if len(answer.split()) > 4:
56
  return "I don't know"
57
 
 
 
 
 
58
  return answer
 
59
  except Exception:
60
  return "I don't know"
61
 
62
- # --------------------------------------------------
63
- # MAIN EVALUATION + SUBMISSION LOGIC
64
- # --------------------------------------------------
65
 
66
  def run_and_submit_all(profile: gr.OAuthProfile | None):
67
 
@@ -75,95 +106,76 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
75
  questions_url = f"{DEFAULT_API_URL}/questions"
76
  submit_url = f"{DEFAULT_API_URL}/submit"
77
 
78
- # Initialize agent
79
- try:
80
- agent = BasicAgent()
81
- except Exception as e:
82
- return f"Error initializing agent: {e}", None
83
-
84
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
85
 
86
  # Fetch questions
87
- try:
88
- response = requests.get(questions_url, timeout=15)
89
- response.raise_for_status()
90
- questions_data = response.json()
91
- except Exception as e:
92
- return f"Error fetching questions: {e}", None
93
 
94
- results_log = []
95
  answers_payload = []
 
96
 
97
  for item in questions_data:
98
- task_id = item.get("task_id")
99
- question_text = item.get("question")
100
 
101
- if not task_id or question_text is None:
102
- continue
103
-
104
- submitted_answer = agent(question_text)
105
 
106
  answers_payload.append({
107
  "task_id": task_id,
108
- "submitted_answer": submitted_answer
109
  })
110
 
111
  results_log.append({
112
  "Task ID": task_id,
113
- "Question": question_text,
114
- "Submitted Answer": submitted_answer
115
  })
116
 
117
  submission_data = {
118
- "username": username.strip(),
119
  "agent_code": agent_code,
120
  "answers": answers_payload
121
  }
122
 
123
- try:
124
- response = requests.post(submit_url, json=submission_data, timeout=60)
125
- response.raise_for_status()
126
- result_data = response.json()
127
-
128
- final_status = (
129
- f"Submission Successful!\n"
130
- f"User: {result_data.get('username')}\n"
131
- f"Score: {result_data.get('score')}% "
132
- f"({result_data.get('correct_count')}/{result_data.get('total_attempted')} correct)\n"
133
- f"Message: {result_data.get('message')}"
134
- )
135
 
136
- return final_status, pd.DataFrame(results_log)
 
 
 
 
 
 
137
 
138
- except Exception as e:
139
- return f"Submission failed: {e}", pd.DataFrame(results_log)
140
 
141
- # --------------------------------------------------
142
- # GRADIO UI (DO NOT CHANGE)
143
- # --------------------------------------------------
144
 
145
  with gr.Blocks() as demo:
146
  gr.Markdown("# GAIA Level-1 Agent – Final Assignment")
147
 
148
  gr.Markdown(
149
  """
150
- **Instructions**
151
- 1. Login with your Hugging Face account
152
- 2. Click the button below
153
- 3. Wait for evaluation and submission
154
  """
155
  )
156
 
157
  gr.LoginButton()
158
-
159
  run_button = gr.Button("Run Evaluation & Submit All Answers")
160
 
161
- status_output = gr.Textbox(label="Submission Result", lines=5)
162
- results_table = gr.DataFrame(label="Questions and Agent Answers")
163
 
164
  run_button.click(
165
  fn=run_and_submit_all,
166
- outputs=[status_output, results_table]
167
  )
168
 
169
  if __name__ == "__main__":
 
2
  import gradio as gr
3
  import requests
4
  import pandas as pd
5
+ import re
6
 
7
+ from smolagents import CodeAgent, tool
8
  from smolagents.models import LiteLLMModel
9
+ from duckduckgo_search import DDGS
10
 
11
+ # ==================================================
12
+ # CONSTANT
13
+ # ==================================================
14
 
15
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
16
 
17
+ # ==================================================
18
+ # SEARCH TOOL (STRICT)
19
+ # ==================================================
20
+
21
+ @tool
22
+ def web_search(query: str) -> str:
23
+ """
24
+ Search the web for factual information.
25
+
26
+ Args:
27
+ query (str): A factual search query.
28
+
29
+ Returns:
30
+ str: Short factual text from search results.
31
+ """
32
+ with DDGS() as ddgs:
33
+ results = list(ddgs.text(query, max_results=5))
34
+ if not results:
35
+ return ""
36
+ return " ".join(r["body"] for r in results)
37
+
38
+ # ==================================================
39
+ # AGENT (HUMAN-LIKE, LEVEL-1 SAFE)
40
+ # ==================================================
41
 
42
  class BasicAgent:
43
  def __init__(self):
 
45
  model_id="huggingface/meta-llama/Meta-Llama-3-8B-Instruct"
46
  )
47
 
 
48
  self.agent = CodeAgent(
49
+ tools=[web_search],
50
  model=model,
51
  instructions=(
52
+ "You answer GAIA Level-1 questions.\n"
53
+ "Process:\n"
54
+ "1. Use search if needed.\n"
55
+ "2. Extract ONLY the short factual answer.\n"
56
  "Rules:\n"
57
+ "- Output ONLY the answer.\n"
58
+ "- No explanation.\n"
59
+ "- No full sentences.\n"
60
+ "- If unclear, output: I don't know\n"
 
 
 
61
  ),
62
+ max_steps=3
63
  )
64
 
65
  def __call__(self, question: str) -> str:
66
  try:
67
  raw = self.agent.run(question)
 
68
  if not raw:
69
  return "I don't know"
70
 
71
  answer = raw.strip()
72
  answer = answer.replace("\n", " ")
 
73
 
74
+ # Remove common filler words humans add
75
+ answer = re.sub(r"^(the|a|an)\s+", "", answer, flags=re.I)
76
+
77
+ # Remove punctuation
78
+ answer = answer.strip(" .,:;\"'()")
79
+
80
+ # GAIA answers are SHORT
81
  if len(answer.split()) > 4:
82
  return "I don't know"
83
 
84
+ # Avoid explanations
85
+ if any(x in answer.lower() for x in [" is ", " was ", " are "]):
86
+ return "I don't know"
87
+
88
  return answer
89
+
90
  except Exception:
91
  return "I don't know"
92
 
93
+ # ==================================================
94
+ # RUN + SUBMIT (TEMPLATE LOGIC – UNCHANGED)
95
+ # ==================================================
96
 
97
  def run_and_submit_all(profile: gr.OAuthProfile | None):
98
 
 
106
  questions_url = f"{DEFAULT_API_URL}/questions"
107
  submit_url = f"{DEFAULT_API_URL}/submit"
108
 
109
+ agent = BasicAgent()
 
 
 
 
 
110
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
111
 
112
  # Fetch questions
113
+ response = requests.get(questions_url, timeout=15)
114
+ questions_data = response.json()
 
 
 
 
115
 
 
116
  answers_payload = []
117
+ results_log = []
118
 
119
  for item in questions_data:
120
+ task_id = item["task_id"]
121
+ question = item["question"]
122
 
123
+ answer = agent(question)
 
 
 
124
 
125
  answers_payload.append({
126
  "task_id": task_id,
127
+ "submitted_answer": answer
128
  })
129
 
130
  results_log.append({
131
  "Task ID": task_id,
132
+ "Question": question,
133
+ "Submitted Answer": answer
134
  })
135
 
136
  submission_data = {
137
+ "username": username,
138
  "agent_code": agent_code,
139
  "answers": answers_payload
140
  }
141
 
142
+ response = requests.post(submit_url, json=submission_data, timeout=60)
143
+ result = response.json()
 
 
 
 
 
 
 
 
 
 
144
 
145
+ status = (
146
+ f"Submission Successful!\n"
147
+ f"User: {result.get('username')}\n"
148
+ f"Score: {result.get('score')}% "
149
+ f"({result.get('correct_count')}/{result.get('total_attempted')} correct)\n"
150
+ f"Message: {result.get('message')}"
151
+ )
152
 
153
+ return status, pd.DataFrame(results_log)
 
154
 
155
+ # ==================================================
156
+ # GRADIO UI
157
+ # ==================================================
158
 
159
  with gr.Blocks() as demo:
160
  gr.Markdown("# GAIA Level-1 Agent – Final Assignment")
161
 
162
  gr.Markdown(
163
  """
164
+ 1. Login with Hugging Face
165
+ 2. Click run
166
+ 3. Wait for submission
 
167
  """
168
  )
169
 
170
  gr.LoginButton()
 
171
  run_button = gr.Button("Run Evaluation & Submit All Answers")
172
 
173
+ status_output = gr.Textbox(label="Submission Result", lines=6)
174
+ table_output = gr.DataFrame(label="Questions & Answers")
175
 
176
  run_button.click(
177
  fn=run_and_submit_all,
178
+ outputs=[status_output, table_output]
179
  )
180
 
181
  if __name__ == "__main__":