likki1715 commited on
Commit
5a57fa7
·
verified ·
1 Parent(s): 4df03d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -120
app.py CHANGED
@@ -5,80 +5,61 @@ import requests
5
  import pandas as pd
6
  import re
7
 
8
- # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Tool Implementations ---
12
 
13
  def download_and_read_task_file(task_id: str):
14
- """Downloads file and immediately reads its content. Returns (filename, content_str)."""
15
  url = f"{DEFAULT_API_URL}/files/{task_id}"
16
  try:
17
  response = requests.get(url, timeout=15)
18
  if response.status_code != 200:
19
  return None, ""
20
-
21
  cd = response.headers.get('content-disposition', '')
22
  filename = f"file_{task_id[:8]}.tmp"
23
  match = re.search(r'filename="?([^"]+)"?', cd)
24
  if match:
25
  filename = match.group(1)
26
-
27
  with open(filename, 'wb') as f:
28
  f.write(response.content)
29
-
30
  print(f" [File downloaded: {filename}]")
31
-
32
- # Try to read content immediately based on file type
33
  ext = filename.lower().split('.')[-1]
34
-
35
  if ext in ['xlsx', 'xls']:
36
  try:
37
- df = pd.read_excel(filename, sheet_name=None)
38
  content = ""
39
- for sheet, data in df.items():
40
  content += f"Sheet: {sheet}\n{data.to_string()}\n\n"
41
- return filename, content[:3000]
42
  except Exception as e:
43
  return filename, f"Excel read error: {e}"
44
-
45
  elif ext == 'py':
46
  try:
47
  with open(filename, 'r', encoding='utf-8') as f:
48
  return filename, f.read()
49
  except Exception as e:
50
  return filename, f"Python file read error: {e}"
51
-
52
  elif ext in ['txt', 'csv', 'json', 'md']:
53
  try:
54
  with open(filename, 'r', encoding='utf-8') as f:
55
- return filename, f.read()[:3000]
56
  except Exception as e:
57
  return filename, f"Text read error: {e}"
58
-
59
  elif ext in ['mp3', 'wav', 'ogg', 'm4a']:
60
- # Audio - try whisper if available, else note it
61
  try:
62
  import whisper
63
  model = whisper.load_model("tiny")
64
  result = model.transcribe(filename)
65
  return filename, f"Audio transcript: {result['text']}"
66
  except Exception:
67
- return filename, f"Audio file '{filename}' downloaded but cannot be transcribed (no whisper). File size: {len(response.content)} bytes."
68
-
69
- elif ext in ['png', 'jpg', 'jpeg', 'gif', 'webp']:
70
- return filename, f"Image file '{filename}' downloaded. Size: {len(response.content)} bytes. Cannot read image content directly."
71
-
72
  else:
73
- # Try text first, fall back to binary
74
  try:
75
  with open(filename, 'r', encoding='utf-8') as f:
76
- return filename, f.read()[:3000]
77
  except Exception:
78
- return filename, f"Binary file '{filename}' downloaded. Size: {len(response.content)} bytes."
79
-
80
  except Exception as e:
81
- print(f" File download error for {task_id}: {e}")
82
  return None, ""
83
 
84
 
@@ -93,7 +74,7 @@ def web_search(query: str) -> str:
93
  for r in results:
94
  output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:300]}")
95
  return "\n---\n".join(output)
96
- except ImportError:
97
  try:
98
  from duckduckgo_search import DDGS
99
  with DDGS() as ddgs:
@@ -106,8 +87,6 @@ def web_search(query: str) -> str:
106
  return "\n---\n".join(output)
107
  except Exception as e:
108
  return f"Search error: {e}"
109
- except Exception as e:
110
- return f"Search error: {e}"
111
 
112
 
113
  def web_fetch(url: str) -> str:
@@ -122,9 +101,9 @@ def web_fetch(url: str) -> str:
122
  tag.decompose()
123
  text = soup.get_text(separator="\n", strip=True)
124
  text = re.sub(r'\n{3,}', '\n\n', text)
125
- return text[:1500]
126
  except ImportError:
127
- return response.text[:1500]
128
  except Exception as e:
129
  return f"Fetch error: {e}"
130
 
@@ -148,7 +127,7 @@ def wikipedia_search(query: str) -> str:
148
  pages = summary_data.get("query", {}).get("pages", {})
149
  for page_id, page in pages.items():
150
  extract = page.get("extract", "No content available.")
151
- return f"Wikipedia: {title}\n\n{extract[:1500]}"
152
  return "No content found."
153
  except Exception as e:
154
  return f"Wikipedia error: {e}"
@@ -163,7 +142,7 @@ def run_python(code: str) -> str:
163
  exec_globals = {}
164
  exec(code, exec_globals)
165
  output = sys.stdout.getvalue()
166
- return output[:1000] if output else "Code ran but printed nothing. Use print() to see data."
167
  except Exception as e:
168
  return f"Python error: {e}"
169
  finally:
@@ -180,70 +159,63 @@ class SmartAgent:
180
  print(f"SmartAgent initialized with Groq ({self.model})")
181
 
182
  def call_llm(self, prompt: str) -> str:
183
- if len(prompt) > 8000:
184
- prompt = prompt[:3000] + "\n\n[...context trimmed...]\n\n" + prompt[-3000:]
185
-
186
- headers = {
187
- "Authorization": f"Bearer {self.api_key}",
188
- "Content-Type": "application/json"
189
- }
190
  payload = {
191
  "model": self.model,
192
  "messages": [{"role": "user", "content": prompt}],
193
- "temperature": 0.1,
194
  "max_tokens": 512
195
  }
196
-
197
- wait_times = [20, 40, 80]
198
  for attempt, wait_time in enumerate(wait_times):
199
  try:
200
  response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
201
  response.raise_for_status()
202
- data = response.json()
203
- return data["choices"][0]["message"]["content"].strip()
204
  except requests.exceptions.HTTPError as e:
205
- status = response.status_code
206
- if status in [429, 503, 500]:
207
- print(f"Groq API Error ({status})! Waiting {wait_time}s... (Attempt {attempt+1}/3)")
208
  time.sleep(wait_time)
209
  else:
210
  raise e
211
-
212
  raise Exception("Failed after 3 attempts.")
213
 
214
  def __call__(self, question: str, task_id: str) -> str:
215
  print(f"\nQuestion: {question[:100]}...")
216
 
217
- # Download and immediately read any attached file
218
  filename, file_content = download_and_read_task_file(task_id)
219
 
220
  file_context = ""
221
  if filename and file_content:
222
- file_context = f"\n\n[ATTACHED FILE: '{filename}']\n{file_content}\n[END OF FILE]"
223
- elif filename:
224
- file_context = f"\n\n[ATTACHED FILE: '{filename}' - could not read content]"
225
 
226
- system = """You are an expert AI solving benchmark questions. Think step by step.
227
 
228
- TOOLS AVAILABLE:
229
- - SEARCH: <query>
230
- - WIKIPEDIA: <query>
231
- - FETCH: <url>
232
- - PYTHON: ```python ... ``` (always use print())
233
-
234
- OUTPUT FORMAT:
235
- THOUGHT: <reasoning>
236
  SEARCH: <query>
237
-
238
- Or when done:
239
- ANSWER: <exact answer>
240
-
241
- STRICT RULES:
242
- - If file content is provided above, use it directly - DO NOT re-read it
243
- - Answer must be exact: numbers only (no units unless asked), short phrases
244
- - No articles (a/the), no commas in numbers
245
- - Comma-separated list if multiple items needed
246
- - Do NOT say "Unable to determine" - always give your best guess"""
 
 
 
 
 
 
 
 
 
 
 
247
 
248
  history = []
249
  initial_prompt = f"{system}\n\nQuestion: {question}{file_context}"
@@ -254,26 +226,31 @@ STRICT RULES:
254
  if not history:
255
  prompt = initial_prompt
256
  else:
257
- recent = history[-3:]
258
  exchanges = "\n\n".join([
259
- f"Step {i+1}:\nAction: {h['action']}\nResult: {h['result'][:400]}"
260
  for i, h in enumerate(recent)
261
  ])
262
- prompt = f"{system}\n\nQuestion: {question}{file_context}\n\nPrevious steps:\n{exchanges}\n\nContinue:"
263
 
264
  response = self.call_llm(prompt)
265
- print(f" LLM [{iteration}]: {response[:200]}...")
266
 
 
267
  fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
268
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
269
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
270
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
271
- answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
272
 
273
  if answer_match:
274
  answer = answer_match.group(1).strip()
275
  print(f" Final Answer: {answer}")
276
  return answer
 
 
 
 
 
277
  elif fetch_match:
278
  url = fetch_match.group(1).strip()
279
  print(f" Tool: FETCH({url[:80]})")
@@ -289,43 +266,33 @@ STRICT RULES:
289
  print(f" Tool: WIKIPEDIA({query})")
290
  result = wikipedia_search(query)
291
  history.append({"action": f"WIKIPEDIA: {query}", "result": result})
292
- elif python_match:
293
- code = python_match.group(1).strip()
294
- print(f" Tool: PYTHON({code[:60]}...)")
295
- result = run_python(code)
296
- history.append({"action": f"PYTHON: {code[:100]}", "result": result})
297
  else:
298
- history.append({"action": "none", "result": "No valid tool. Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
299
-
300
- # Fallback: force a best-guess answer
301
- recent = history[-3:]
302
- exchanges = "\n\n".join([f"Action: {h['action']}\nResult: {h['result'][:300]}" for h in recent])
303
- fallback_prompt = (
304
- f"{system}\n\nQuestion: {question}{file_context}\n\n"
305
- f"Steps taken:\n{exchanges}\n\n"
306
- f"You MUST give a final answer now. Do not say 'unable to determine'. "
307
- f"Use your best judgment. Output ONLY: ANSWER: <answer>"
308
  )
309
- last = self.call_llm(fallback_prompt)
310
- answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
311
- if answer_match:
312
- return answer_match.group(1).strip()
313
  return last.strip().split('\n')[0][:200]
314
 
315
 
316
  def run_and_submit_all(profile: gr.OAuthProfile | None):
317
  space_id = os.getenv("SPACE_ID")
318
-
319
  if profile:
320
- username = f"{profile.username}"
321
  print(f"User logged in: {username}")
322
  else:
323
  return "Please Login to Hugging Face with the button.", None
324
 
325
- api_url = DEFAULT_API_URL
326
- questions_url = f"{api_url}/questions"
327
- submit_url = f"{api_url}/submit"
328
-
329
  try:
330
  agent = SmartAgent()
331
  except Exception as e:
@@ -334,11 +301,9 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
334
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
335
 
336
  try:
337
- response = requests.get(questions_url, timeout=15)
338
  response.raise_for_status()
339
  questions_data = response.json()
340
- if not questions_data:
341
- return "Fetched questions list is empty.", None
342
  print(f"Fetched {len(questions_data)} questions.")
343
  except Exception as e:
344
  return f"Error fetching questions: {e}", None
@@ -351,7 +316,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
351
  question_text = item.get("question")
352
  if not task_id or question_text is None:
353
  continue
354
-
355
  try:
356
  submitted_answer = agent(question_text, task_id)
357
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
@@ -359,7 +323,6 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
359
  except Exception as e:
360
  print(f"Error on task {task_id}: {e}")
361
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
362
-
363
  time.sleep(30)
364
 
365
  if not answers_payload:
@@ -367,7 +330,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
367
 
368
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
369
  try:
370
- response = requests.post(submit_url, json=submission_data, timeout=120)
371
  response.raise_for_status()
372
  result_data = response.json()
373
  final_status = (
@@ -381,8 +344,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
381
  except requests.exceptions.HTTPError as e:
382
  error_detail = f"Server responded with status {e.response.status_code}."
383
  try:
384
- error_json = e.response.json()
385
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
386
  except Exception:
387
  error_detail += f" Response: {e.response.text[:500]}"
388
  return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
@@ -392,16 +354,12 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
392
 
393
  with gr.Blocks() as demo:
394
  gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
395
- gr.Markdown(
396
- """
397
  **Powered by Groq (Llama 3.3 70B)**
398
- **Instructions:**
399
- 1. Make sure `GROQ_API_KEY` is set in your Space secrets
400
- 2. `requirements.txt` must include: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
401
- 3. Log in with your Hugging Face account below
402
- 4. Click **Run Evaluation & Submit All Answers**
403
- """
404
- )
405
  gr.LoginButton()
406
  run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
407
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
 
5
  import pandas as pd
6
  import re
7
 
 
8
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
9
 
 
10
 
11
  def download_and_read_task_file(task_id: str):
 
12
  url = f"{DEFAULT_API_URL}/files/{task_id}"
13
  try:
14
  response = requests.get(url, timeout=15)
15
  if response.status_code != 200:
16
  return None, ""
 
17
  cd = response.headers.get('content-disposition', '')
18
  filename = f"file_{task_id[:8]}.tmp"
19
  match = re.search(r'filename="?([^"]+)"?', cd)
20
  if match:
21
  filename = match.group(1)
 
22
  with open(filename, 'wb') as f:
23
  f.write(response.content)
 
24
  print(f" [File downloaded: {filename}]")
 
 
25
  ext = filename.lower().split('.')[-1]
 
26
  if ext in ['xlsx', 'xls']:
27
  try:
28
+ df_dict = pd.read_excel(filename, sheet_name=None)
29
  content = ""
30
+ for sheet, data in df_dict.items():
31
  content += f"Sheet: {sheet}\n{data.to_string()}\n\n"
32
+ return filename, content[:4000]
33
  except Exception as e:
34
  return filename, f"Excel read error: {e}"
 
35
  elif ext == 'py':
36
  try:
37
  with open(filename, 'r', encoding='utf-8') as f:
38
  return filename, f.read()
39
  except Exception as e:
40
  return filename, f"Python file read error: {e}"
 
41
  elif ext in ['txt', 'csv', 'json', 'md']:
42
  try:
43
  with open(filename, 'r', encoding='utf-8') as f:
44
+ return filename, f.read()[:4000]
45
  except Exception as e:
46
  return filename, f"Text read error: {e}"
 
47
  elif ext in ['mp3', 'wav', 'ogg', 'm4a']:
 
48
  try:
49
  import whisper
50
  model = whisper.load_model("tiny")
51
  result = model.transcribe(filename)
52
  return filename, f"Audio transcript: {result['text']}"
53
  except Exception:
54
+ return filename, f"Audio file '{filename}' - cannot transcribe without whisper."
 
 
 
 
55
  else:
 
56
  try:
57
  with open(filename, 'r', encoding='utf-8') as f:
58
+ return filename, f.read()[:4000]
59
  except Exception:
60
+ return filename, f"Binary file '{filename}' - {len(response.content)} bytes."
 
61
  except Exception as e:
62
+ print(f" File download error: {e}")
63
  return None, ""
64
 
65
 
 
74
  for r in results:
75
  output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:300]}")
76
  return "\n---\n".join(output)
77
+ except Exception:
78
  try:
79
  from duckduckgo_search import DDGS
80
  with DDGS() as ddgs:
 
87
  return "\n---\n".join(output)
88
  except Exception as e:
89
  return f"Search error: {e}"
 
 
90
 
91
 
92
  def web_fetch(url: str) -> str:
 
101
  tag.decompose()
102
  text = soup.get_text(separator="\n", strip=True)
103
  text = re.sub(r'\n{3,}', '\n\n', text)
104
+ return text[:2000]
105
  except ImportError:
106
+ return response.text[:2000]
107
  except Exception as e:
108
  return f"Fetch error: {e}"
109
 
 
127
  pages = summary_data.get("query", {}).get("pages", {})
128
  for page_id, page in pages.items():
129
  extract = page.get("extract", "No content available.")
130
+ return f"Wikipedia: {title}\n\n{extract[:2000]}"
131
  return "No content found."
132
  except Exception as e:
133
  return f"Wikipedia error: {e}"
 
142
  exec_globals = {}
143
  exec(code, exec_globals)
144
  output = sys.stdout.getvalue()
145
+ return output[:1500] if output else "Code ran but printed nothing. Add print() statements."
146
  except Exception as e:
147
  return f"Python error: {e}"
148
  finally:
 
159
  print(f"SmartAgent initialized with Groq ({self.model})")
160
 
161
  def call_llm(self, prompt: str) -> str:
162
+ if len(prompt) > 7000:
163
+ prompt = prompt[:3000] + "\n\n[...trimmed...]\n\n" + prompt[-3000:]
164
+ headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
 
 
 
 
165
  payload = {
166
  "model": self.model,
167
  "messages": [{"role": "user", "content": prompt}],
168
+ "temperature": 0.0,
169
  "max_tokens": 512
170
  }
171
+ wait_times = [25, 50, 100]
 
172
  for attempt, wait_time in enumerate(wait_times):
173
  try:
174
  response = requests.post(self.api_url, headers=headers, json=payload, timeout=60)
175
  response.raise_for_status()
176
+ return response.json()["choices"][0]["message"]["content"].strip()
 
177
  except requests.exceptions.HTTPError as e:
178
+ if response.status_code in [429, 503, 500]:
179
+ print(f"Groq Error ({response.status_code})! Waiting {wait_time}s...")
 
180
  time.sleep(wait_time)
181
  else:
182
  raise e
 
183
  raise Exception("Failed after 3 attempts.")
184
 
185
  def __call__(self, question: str, task_id: str) -> str:
186
  print(f"\nQuestion: {question[:100]}...")
187
 
 
188
  filename, file_content = download_and_read_task_file(task_id)
189
 
190
  file_context = ""
191
  if filename and file_content:
192
+ file_context = f"\n\n[FILE '{filename}' CONTENT]:\n{file_content}\n[END FILE]"
 
 
193
 
194
+ system = """You are a precise AI assistant solving benchmark questions with EXACT answers required.
195
 
196
+ TOOLS (use ONE per response):
 
 
 
 
 
 
 
197
  SEARCH: <query>
198
+ WIKIPEDIA: <query>
199
+ FETCH: <full_url>
200
+ PYTHON:
201
+ ```python
202
+ # code here - always use print()
203
+ ```
204
+
205
+ When you have the answer:
206
+ ANSWER: <value>
207
+
208
+ CRITICAL RULES:
209
+ 1. NEVER guess - only answer when you have verified the information from a source
210
+ 2. For reversed/encoded text questions - use PYTHON to decode immediately
211
+ 3. For file questions - the file content is provided above, analyze it with PYTHON
212
+ 4. For math/counting - use PYTHON to compute
213
+ 5. Answer format must be EXACT:
214
+ - Numbers: digits only, no units unless explicitly asked
215
+ - Lists: comma separated, alphabetical if asked, exact spelling
216
+ - Names: exact as found in source
217
+ 6. If you see a URL in the question - FETCH it first
218
+ 7. Do NOT make up data - search for it"""
219
 
220
  history = []
221
  initial_prompt = f"{system}\n\nQuestion: {question}{file_context}"
 
226
  if not history:
227
  prompt = initial_prompt
228
  else:
229
+ recent = history[-4:]
230
  exchanges = "\n\n".join([
231
+ f"Step {i+1}: {h['action']}\nResult: {h['result'][:500]}"
232
  for i, h in enumerate(recent)
233
  ])
234
+ prompt = f"{system}\n\nQuestion: {question}{file_context}\n\nSteps so far:\n{exchanges}\n\nNext step:"
235
 
236
  response = self.call_llm(prompt)
237
+ print(f" LLM [{iteration}]: {response[:250]}...")
238
 
239
+ answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
240
  fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
241
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
242
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
243
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
 
244
 
245
  if answer_match:
246
  answer = answer_match.group(1).strip()
247
  print(f" Final Answer: {answer}")
248
  return answer
249
+ elif python_match:
250
+ code = python_match.group(1).strip()
251
+ print(f" Tool: PYTHON")
252
+ result = run_python(code)
253
+ history.append({"action": f"PYTHON: {code[:150]}", "result": result})
254
  elif fetch_match:
255
  url = fetch_match.group(1).strip()
256
  print(f" Tool: FETCH({url[:80]})")
 
266
  print(f" Tool: WIKIPEDIA({query})")
267
  result = wikipedia_search(query)
268
  history.append({"action": f"WIKIPEDIA: {query}", "result": result})
 
 
 
 
 
269
  else:
270
+ history.append({"action": "none", "result": "Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
271
+
272
+ # Forced fallback
273
+ recent = history[-4:]
274
+ exchanges = "\n\n".join([f"{h['action']}\n-> {h['result'][:400]}" for h in recent])
275
+ fallback = (
276
+ f"Question: {question}{file_context}\n\n"
277
+ f"Research done:\n{exchanges}\n\n"
278
+ f"Based on the research above, give the single best answer. "
279
+ f"Output ONLY: ANSWER: <answer>"
280
  )
281
+ last = self.call_llm(fallback)
282
+ m = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
283
+ if m:
284
+ return m.group(1).strip()
285
  return last.strip().split('\n')[0][:200]
286
 
287
 
288
  def run_and_submit_all(profile: gr.OAuthProfile | None):
289
  space_id = os.getenv("SPACE_ID")
 
290
  if profile:
291
+ username = profile.username
292
  print(f"User logged in: {username}")
293
  else:
294
  return "Please Login to Hugging Face with the button.", None
295
 
 
 
 
 
296
  try:
297
  agent = SmartAgent()
298
  except Exception as e:
 
301
  agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
302
 
303
  try:
304
+ response = requests.get(f"{DEFAULT_API_URL}/questions", timeout=15)
305
  response.raise_for_status()
306
  questions_data = response.json()
 
 
307
  print(f"Fetched {len(questions_data)} questions.")
308
  except Exception as e:
309
  return f"Error fetching questions: {e}", None
 
316
  question_text = item.get("question")
317
  if not task_id or question_text is None:
318
  continue
 
319
  try:
320
  submitted_answer = agent(question_text, task_id)
321
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
 
323
  except Exception as e:
324
  print(f"Error on task {task_id}: {e}")
325
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
 
326
  time.sleep(30)
327
 
328
  if not answers_payload:
 
330
 
331
  submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
332
  try:
333
+ response = requests.post(f"{DEFAULT_API_URL}/submit", json=submission_data, timeout=120)
334
  response.raise_for_status()
335
  result_data = response.json()
336
  final_status = (
 
344
  except requests.exceptions.HTTPError as e:
345
  error_detail = f"Server responded with status {e.response.status_code}."
346
  try:
347
+ error_detail += f" Detail: {e.response.json().get('detail', e.response.text)}"
 
348
  except Exception:
349
  error_detail += f" Response: {e.response.text[:500]}"
350
  return f"Submission Failed: {error_detail}", pd.DataFrame(results_log)
 
354
 
355
  with gr.Blocks() as demo:
356
  gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
357
+ gr.Markdown("""
 
358
  **Powered by Groq (Llama 3.3 70B)**
359
+ 1. Set `GROQ_API_KEY` in Space secrets
360
+ 2. `requirements.txt`: `gradio requests pandas openpyxl ddgs beautifulsoup4`
361
+ 3. Login and click Run
362
+ """)
 
 
 
363
  gr.LoginButton()
364
  run_button = gr.Button("🚀 Run Evaluation & Submit All Answers", variant="primary")
365
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)