likki1715 commited on
Commit
5e36f36
·
verified ·
1 Parent(s): bda8117

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -96
app.py CHANGED
@@ -22,24 +22,17 @@ def download_task_file(task_id: str) -> str:
22
  match = re.search(r'filename="?([^"]+)"?', cd)
23
  if match:
24
  filename = match.group(1)
25
-
26
  with open(filename, 'wb') as f:
27
  f.write(response.content)
28
-
29
- # Give the agent explicit instructions on how to read the file
30
  return (
31
- f"\n[SYSTEM: A file '{filename}' has been downloaded for this task. "
32
- f"You MUST read it immediately as your FIRST action using:\n"
33
- f"PYTHON:\n```python\n"
34
- f"# Try reading as text first\n"
35
  f"try:\n"
36
  f" with open('{filename}', 'r', encoding='utf-8') as f:\n"
37
  f" print(f.read())\n"
38
  f"except:\n"
39
- f" # If binary, read as bytes and show first 2000 chars\n"
40
  f" with open('{filename}', 'rb') as f:\n"
41
- f" data = f.read()\n"
42
- f" print(repr(data[:2000]))\n"
43
  f"```\n]\n"
44
  )
45
  return ""
@@ -52,24 +45,23 @@ def web_search(query: str) -> str:
52
  try:
53
  from ddgs import DDGS
54
  with DDGS() as ddgs:
55
- results = list(ddgs.text(query, max_results=7))
56
  if not results:
57
  return "No results found."
58
  output = []
59
  for r in results:
60
- output.append(f"Title: {r.get('title', '')}\nURL: {r.get('href', '')}\nSnippet: {r.get('body', '')}")
61
  return "\n---\n".join(output)
62
  except ImportError:
63
- # Fallback to old package name
64
  try:
65
  from duckduckgo_search import DDGS
66
  with DDGS() as ddgs:
67
- results = list(ddgs.text(query, max_results=7))
68
  if not results:
69
  return "No results found."
70
  output = []
71
  for r in results:
72
- output.append(f"Title: {r.get('title', '')}\nURL: {r.get('href', '')}\nSnippet: {r.get('body', '')}")
73
  return "\n---\n".join(output)
74
  except Exception as e:
75
  return f"Search error: {e}"
@@ -83,20 +75,16 @@ def web_fetch(url: str) -> str:
83
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
84
  response = requests.get(url, timeout=20, headers=headers)
85
  response.raise_for_status()
86
-
87
- # Try to parse as HTML
88
  try:
89
  from bs4 import BeautifulSoup
90
  soup = BeautifulSoup(response.text, "html.parser")
91
- # Remove scripts and styles
92
  for tag in soup(["script", "style", "nav", "footer"]):
93
  tag.decompose()
94
  text = soup.get_text(separator="\n", strip=True)
95
- # Collapse excessive newlines
96
  text = re.sub(r'\n{3,}', '\n\n', text)
97
- return text[:2000]
98
  except ImportError:
99
- return response.text[:2000]
100
  except Exception as e:
101
  return f"Fetch error: {e}"
102
 
@@ -120,7 +108,7 @@ def wikipedia_search(query: str) -> str:
120
  pages = summary_data.get("query", {}).get("pages", {})
121
  for page_id, page in pages.items():
122
  extract = page.get("extract", "No content available.")
123
- return f"Wikipedia: {title}\n\n{extract[:2000]}"
124
  return "No content found."
125
  except Exception as e:
126
  return f"Wikipedia error: {e}"
@@ -135,9 +123,9 @@ def run_python(code: str) -> str:
135
  exec_globals = {}
136
  exec(code, exec_globals)
137
  output = sys.stdout.getvalue()
138
- return output if output else "Code executed successfully, but printed nothing. Use print() to see data."
139
  except Exception as e:
140
- return f"Python execution error: {e}"
141
  finally:
142
  sys.stdout = old_stdout
143
 
@@ -146,13 +134,16 @@ class SmartAgent:
146
  def __init__(self):
147
  self.api_key = os.getenv("GROQ_API_KEY")
148
  if not self.api_key:
149
- raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
150
-
151
  self.api_url = "https://api.groq.com/openai/v1/chat/completions"
152
  self.model = "llama-3.1-8b-instant"
153
  print(f"SmartAgent initialized with Groq ({self.model})")
154
 
155
  def call_llm(self, prompt: str) -> str:
 
 
 
 
156
  headers = {
157
  "Authorization": f"Bearer {self.api_key}",
158
  "Content-Type": "application/json"
@@ -161,7 +152,7 @@ class SmartAgent:
161
  "model": self.model,
162
  "messages": [{"role": "user", "content": prompt}],
163
  "temperature": 0.1,
164
- "max_tokens": 1024
165
  }
166
 
167
  wait_times = [20, 40, 80]
@@ -174,104 +165,97 @@ class SmartAgent:
174
  except requests.exceptions.HTTPError as e:
175
  status = response.status_code
176
  if status in [429, 503, 500]:
177
- print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
178
  time.sleep(wait_time)
179
  else:
180
  raise e
181
 
182
- raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
183
 
184
  def __call__(self, question: str, task_id: str) -> str:
185
  print(f"\nQuestion: {question[:100]}...")
186
 
187
  file_alert = download_task_file(task_id)
188
 
189
- system = """You are an elite AI assistant solving benchmark questions. You must think step-by-step.
190
-
191
- You have access to these tools:
192
- - SEARCH: Search the web for current information
193
- - WIKIPEDIA: Search Wikipedia for factual information
194
- - FETCH: Retrieve and read the full content of a URL
195
- - PYTHON: Execute Python code (always use print() to see output)
196
-
197
- You MUST use tools by outputting EXACTLY one of these formats per step:
198
-
199
- THOUGHT: <your reasoning>
200
- SEARCH: <query>
201
-
202
- THOUGHT: <your reasoning>
203
- WIKIPEDIA: <query>
204
 
205
- THOUGHT: <your reasoning>
206
- FETCH: <full URL>
 
 
 
207
 
208
- THOUGHT: <your reasoning>
209
- PYTHON:
210
- ```python
211
- <your code - always use print() to see results>
212
- ```
213
 
214
- When 100% sure of the answer:
215
- THOUGHT: <final reasoning>
216
- ANSWER: <exact answer>
 
 
 
 
217
 
218
- CRITICAL RULES:
219
- - If a file was downloaded, READ IT FIRST before doing anything else
220
- - For URLs in questions, use FETCH to read them directly
221
- - For Wikipedia pages, use FETCH on the actual Wikipedia URL for full content
222
- - ANSWER must be exact: numbers only (no units unless asked), short phrases, comma-separated lists
223
- - Do NOT include "FINAL ANSWER:" prefix - just "ANSWER: " followed by the answer
224
- - Never give up - always try a different approach if one fails"""
225
-
226
- conversation = f"{system}\n\nQuestion: {question}{file_alert}"
227
 
228
  for iteration in range(10):
229
- time.sleep(8) # Increased rate limit safety
 
 
 
 
 
 
 
 
 
 
 
230
 
231
- response = self.call_llm(conversation)
232
- print(f" LLM [{iteration}]: \n{response[:300]}...\n")
233
 
234
  # Parse tool calls
 
235
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
236
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
237
- fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
238
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
239
-
240
- if not python_match:
241
- python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nFETCH|\nANSWER|$)', response, re.DOTALL)
242
-
243
- tool_result = None
244
 
245
  if fetch_match:
246
  url = fetch_match.group(1).strip()
247
- print(f" Tool: web_fetch({url[:80]})")
248
- tool_result = f"Page content from '{url}':\n{web_fetch(url)}"
 
249
  elif search_match:
250
  query = search_match.group(1).strip()
251
- print(f" Tool: web_search({query})")
252
- tool_result = f"Search results for '{query}':\n{web_search(query)}"
 
253
  elif wiki_match:
254
  query = wiki_match.group(1).strip()
255
- print(f" Tool: wikipedia({query})")
256
- tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
 
257
  elif python_match:
258
  code = python_match.group(1).strip()
259
- print(f" Tool: python({code[:80]}...)")
260
- tool_result = f"Python output:\n{run_python(code)}"
 
 
 
 
 
261
  else:
262
- answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
263
- if answer_match:
264
- answer = answer_match.group(1).strip()
265
- print(f" Final Answer: {answer}")
266
- return answer
267
-
268
- tool_result = "No valid tool call detected. Please use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."
269
-
270
- conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your next THOUGHT and tool, or your ANSWER."
271
 
272
- # Fallback
273
- conversation += "\n\nYou are out of steps. Give ONLY the final answer as: ANSWER: <answer>"
274
- last = self.call_llm(conversation)
 
 
275
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
276
  if answer_match:
277
  return answer_match.group(1).strip()
@@ -325,7 +309,7 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
325
  print(f"Error on task {task_id}: {e}")
326
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
327
 
328
- time.sleep(15) # Increased safety pause between questions
329
 
330
  if not answers_payload:
331
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
@@ -359,10 +343,10 @@ with gr.Blocks() as demo:
359
  gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
360
  gr.Markdown(
361
  """
362
- **Powered by Groq (Llama 3.3 70B)**
363
  **Instructions:**
364
  1. Make sure `GROQ_API_KEY` is set in your Space secrets
365
- 2. Make sure your `requirements.txt` includes: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
366
  3. Log in with your Hugging Face account below
367
  4. Click **Run Evaluation & Submit All Answers**
368
  """
@@ -376,6 +360,6 @@ with gr.Blocks() as demo:
376
  if __name__ == "__main__":
377
  print("\n" + "=" * 30 + " Application Startup " + "=" * 30)
378
  print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}")
379
- print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}")
380
  print("=" * 81 + "\n")
381
  demo.launch(debug=True, share=False)
 
22
  match = re.search(r'filename="?([^"]+)"?', cd)
23
  if match:
24
  filename = match.group(1)
 
25
  with open(filename, 'wb') as f:
26
  f.write(response.content)
 
 
27
  return (
28
+ f"\n[SYSTEM: File '{filename}' downloaded. "
29
+ f"READ IT FIRST using PYTHON:\n```python\n"
 
 
30
  f"try:\n"
31
  f" with open('{filename}', 'r', encoding='utf-8') as f:\n"
32
  f" print(f.read())\n"
33
  f"except:\n"
 
34
  f" with open('{filename}', 'rb') as f:\n"
35
+ f" print(repr(f.read()[:2000]))\n"
 
36
  f"```\n]\n"
37
  )
38
  return ""
 
45
  try:
46
  from ddgs import DDGS
47
  with DDGS() as ddgs:
48
+ results = list(ddgs.text(query, max_results=5))
49
  if not results:
50
  return "No results found."
51
  output = []
52
  for r in results:
53
+ output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:200]}")
54
  return "\n---\n".join(output)
55
  except ImportError:
 
56
  try:
57
  from duckduckgo_search import DDGS
58
  with DDGS() as ddgs:
59
+ results = list(ddgs.text(query, max_results=5))
60
  if not results:
61
  return "No results found."
62
  output = []
63
  for r in results:
64
+ output.append(f"Title: {r.get('title','')}\nURL: {r.get('href','')}\nSnippet: {r.get('body','')[:200]}")
65
  return "\n---\n".join(output)
66
  except Exception as e:
67
  return f"Search error: {e}"
 
75
  headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
76
  response = requests.get(url, timeout=20, headers=headers)
77
  response.raise_for_status()
 
 
78
  try:
79
  from bs4 import BeautifulSoup
80
  soup = BeautifulSoup(response.text, "html.parser")
 
81
  for tag in soup(["script", "style", "nav", "footer"]):
82
  tag.decompose()
83
  text = soup.get_text(separator="\n", strip=True)
 
84
  text = re.sub(r'\n{3,}', '\n\n', text)
85
+ return text[:1500]
86
  except ImportError:
87
+ return response.text[:1500]
88
  except Exception as e:
89
  return f"Fetch error: {e}"
90
 
 
108
  pages = summary_data.get("query", {}).get("pages", {})
109
  for page_id, page in pages.items():
110
  extract = page.get("extract", "No content available.")
111
+ return f"Wikipedia: {title}\n\n{extract[:1500]}"
112
  return "No content found."
113
  except Exception as e:
114
  return f"Wikipedia error: {e}"
 
123
  exec_globals = {}
124
  exec(code, exec_globals)
125
  output = sys.stdout.getvalue()
126
+ return output[:1000] if output else "Code ran but printed nothing. Use print() to see data."
127
  except Exception as e:
128
+ return f"Python error: {e}"
129
  finally:
130
  sys.stdout = old_stdout
131
 
 
134
  def __init__(self):
135
  self.api_key = os.getenv("GROQ_API_KEY")
136
  if not self.api_key:
137
+ raise ValueError("GROQ_API_KEY not set!")
 
138
  self.api_url = "https://api.groq.com/openai/v1/chat/completions"
139
  self.model = "llama-3.1-8b-instant"
140
  print(f"SmartAgent initialized with Groq ({self.model})")
141
 
142
  def call_llm(self, prompt: str) -> str:
143
+ # Hard cap prompt to avoid 413
144
+ if len(prompt) > 8000:
145
+ prompt = prompt[:3000] + "\n\n[...context trimmed...]\n\n" + prompt[-3000:]
146
+
147
  headers = {
148
  "Authorization": f"Bearer {self.api_key}",
149
  "Content-Type": "application/json"
 
152
  "model": self.model,
153
  "messages": [{"role": "user", "content": prompt}],
154
  "temperature": 0.1,
155
+ "max_tokens": 512
156
  }
157
 
158
  wait_times = [20, 40, 80]
 
165
  except requests.exceptions.HTTPError as e:
166
  status = response.status_code
167
  if status in [429, 503, 500]:
168
+ print(f"Groq API Error ({status})! Waiting {wait_time}s... (Attempt {attempt+1}/3)")
169
  time.sleep(wait_time)
170
  else:
171
  raise e
172
 
173
+ raise Exception("Failed after 3 attempts.")
174
 
175
  def __call__(self, question: str, task_id: str) -> str:
176
  print(f"\nQuestion: {question[:100]}...")
177
 
178
  file_alert = download_task_file(task_id)
179
 
180
+ system = """You are an expert AI solving benchmark questions. Think step by step.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
 
182
+ TOOLS AVAILABLE:
183
+ - SEARCH: <query> (web search)
184
+ - WIKIPEDIA: <query> (Wikipedia lookup)
185
+ - FETCH: <url> (read a webpage)
186
+ - PYTHON: ```python ... ``` (run code, always use print())
187
 
188
+ ANSWER FORMAT: When done, output exactly:
189
+ ANSWER: <your answer>
 
 
 
190
 
191
+ RULES:
192
+ - Read any downloaded file FIRST with PYTHON
193
+ - Use FETCH to read URLs mentioned in questions
194
+ - Be concise - one tool per step
195
+ - Numbers: no units unless asked, no commas
196
+ - Short phrases, no articles (a/the)
197
+ - Comma-separated if multiple items needed"""
198
 
199
+ # Keep a short rolling history instead of full conversation
200
+ history = []
201
+ initial_prompt = f"{system}\n\nQuestion: {question}{file_alert}"
 
 
 
 
 
 
202
 
203
  for iteration in range(10):
204
+ time.sleep(8)
205
+
206
+ # Build prompt from system + question + last 3 exchanges only
207
+ if not history:
208
+ prompt = initial_prompt
209
+ else:
210
+ recent = history[-3:]
211
+ exchanges = "\n\n".join([
212
+ f"Step {i+1}:\nAction: {h['action']}\nResult: {h['result'][:400]}"
213
+ for i, h in enumerate(recent)
214
+ ])
215
+ prompt = f"{system}\n\nQuestion: {question}{file_alert}\n\nPrevious steps:\n{exchanges}\n\nContinue:"
216
 
217
+ response = self.call_llm(prompt)
218
+ print(f" LLM [{iteration}]: {response[:200]}...")
219
 
220
  # Parse tool calls
221
+ fetch_match = re.search(r'FETCH:\s*(https?://\S+)', response)
222
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
223
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
 
224
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
225
+ answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
 
 
 
 
226
 
227
  if fetch_match:
228
  url = fetch_match.group(1).strip()
229
+ print(f" Tool: FETCH({url[:80]})")
230
+ result = web_fetch(url)
231
+ history.append({"action": f"FETCH: {url}", "result": result})
232
  elif search_match:
233
  query = search_match.group(1).strip()
234
+ print(f" Tool: SEARCH({query})")
235
+ result = web_search(query)
236
+ history.append({"action": f"SEARCH: {query}", "result": result})
237
  elif wiki_match:
238
  query = wiki_match.group(1).strip()
239
+ print(f" Tool: WIKIPEDIA({query})")
240
+ result = wikipedia_search(query)
241
+ history.append({"action": f"WIKIPEDIA: {query}", "result": result})
242
  elif python_match:
243
  code = python_match.group(1).strip()
244
+ print(f" Tool: PYTHON({code[:60]}...)")
245
+ result = run_python(code)
246
+ history.append({"action": f"PYTHON: {code[:100]}", "result": result})
247
+ elif answer_match:
248
+ answer = answer_match.group(1).strip()
249
+ print(f" Final Answer: {answer}")
250
+ return answer
251
  else:
252
+ history.append({"action": "none", "result": "No valid tool found. Use SEARCH, WIKIPEDIA, FETCH, PYTHON, or ANSWER."})
 
 
 
 
 
 
 
 
253
 
254
+ # Fallback: force answer
255
+ recent = history[-3:]
256
+ exchanges = "\n\n".join([f"Action: {h['action']}\nResult: {h['result'][:300]}" for h in recent])
257
+ fallback_prompt = f"{system}\n\nQuestion: {question}\n\nSteps taken:\n{exchanges}\n\nOut of steps. Give ONLY: ANSWER: <answer>"
258
+ last = self.call_llm(fallback_prompt)
259
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
260
  if answer_match:
261
  return answer_match.group(1).strip()
 
309
  print(f"Error on task {task_id}: {e}")
310
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
311
 
312
+ time.sleep(15)
313
 
314
  if not answers_payload:
315
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
 
343
  gr.Markdown("# 🤖 Smart Agent — GAIA Benchmark Runner")
344
  gr.Markdown(
345
  """
346
+ **Powered by Groq (Llama 3.1 8B Instant)**
347
  **Instructions:**
348
  1. Make sure `GROQ_API_KEY` is set in your Space secrets
349
+ 2. `requirements.txt` must include: `gradio`, `requests`, `pandas`, `openpyxl`, `ddgs`, `beautifulsoup4`
350
  3. Log in with your Hugging Face account below
351
  4. Click **Run Evaluation & Submit All Answers**
352
  """
 
360
  if __name__ == "__main__":
361
  print("\n" + "=" * 30 + " Application Startup " + "=" * 30)
362
  print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}")
363
+ print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}")
364
  print("=" * 81 + "\n")
365
  demo.launch(debug=True, share=False)