likki1715 commited on
Commit
238a105
·
verified ·
1 Parent(s): 8c2b20e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -34
app.py CHANGED
@@ -11,6 +11,29 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
12
  # --- Tool Implementations ---
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  def web_search(query: str) -> str:
15
  try:
16
  with DDGS() as ddgs:
@@ -24,7 +47,6 @@ def web_search(query: str) -> str:
24
  except Exception as e:
25
  return f"Search error: {e}"
26
 
27
-
28
  def wikipedia_search(query: str) -> str:
29
  try:
30
  search_url = "https://en.wikipedia.org/w/api.php"
@@ -49,7 +71,6 @@ def wikipedia_search(query: str) -> str:
49
  except Exception as e:
50
  return f"Wikipedia error: {e}"
51
 
52
-
53
  def run_python(code: str) -> str:
54
  import sys
55
  from io import StringIO
@@ -59,20 +80,19 @@ def run_python(code: str) -> str:
59
  exec_globals = {}
60
  exec(code, exec_globals)
61
  output = sys.stdout.getvalue()
62
- return output if output else "Code executed (no output)."
 
63
  except Exception as e:
64
- return f"Error: {e}"
65
  finally:
66
  sys.stdout = old_stdout
67
 
68
-
69
  class SmartAgent:
70
  def __init__(self):
71
  self.api_key = os.getenv("GROQ_API_KEY")
72
  if not self.api_key:
73
  raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
74
 
75
- # Groq's OpenAI-compatible endpoint
76
  self.api_url = "https://api.groq.com/openai/v1/chat/completions"
77
  self.model = "llama-3.3-70b-versatile"
78
  print(f"SmartAgent initialized with Groq ({self.model})")
@@ -89,7 +109,6 @@ class SmartAgent:
89
  "max_tokens": 1024
90
  }
91
 
92
- # Exponential backoff retry mechanism
93
  wait_times = [15, 30, 60]
94
  for attempt, wait_time in enumerate(wait_times):
95
  try:
@@ -99,7 +118,7 @@ class SmartAgent:
99
  return data["choices"][0]["message"]["content"].strip()
100
  except requests.exceptions.HTTPError as e:
101
  status = response.status_code
102
- if status == 429 or status == 503:
103
  print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
104
  time.sleep(wait_time)
105
  else:
@@ -107,36 +126,50 @@ class SmartAgent:
107
 
108
  raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
109
 
110
- def __call__(self, question: str) -> str:
111
  print(f"\nQuestion: {question[:100]}...")
 
 
 
 
 
112
 
113
- system = """You are a precise AI assistant solving benchmark questions.
114
 
115
- You can use these tools by outputting exactly:
 
116
  SEARCH: <query>
 
 
 
117
  WIKIPEDIA: <query>
118
- PYTHON: <code>
119
 
120
- After gathering enough info, give your final answer as:
 
 
 
 
 
 
 
 
121
  ANSWER: <your exact short answer>
122
 
123
- Rules for the answer:
124
  - DO NOT wrap the answer in "FINAL ANSWER: " or any other text. Output strictly "ANSWER: " followed by the exact answer string.
125
- - Numbers only (no units unless asked, no commas in numbers)
126
  - Short phrases (no articles like a/the, no abbreviations for proper nouns)
127
  - Comma-separated list if multiple items needed
128
- - Exact match required - be very precise"""
129
 
130
- conversation = f"{system}\n\nQuestion: {question}"
131
 
132
- for iteration in range(6):
133
- # Groq is fast, but we pause slightly to respect free tier limits
134
- time.sleep(2.5)
135
 
136
  response = self.call_llm(conversation)
137
- print(f" LLM [{iteration}]: {response[:200]}")
138
 
139
- # Check for final answer
140
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
141
  if answer_match:
142
  answer = answer_match.group(1).strip()
@@ -148,6 +181,7 @@ Rules for the answer:
148
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
149
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
150
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
 
151
  if not python_match:
152
  python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nANSWER|$)', response, re.DOTALL)
153
 
@@ -161,16 +195,15 @@ Rules for the answer:
161
  tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
162
  elif python_match:
163
  code = python_match.group(1).strip()
164
- print(f" Tool: python({code[:50]})")
165
  tool_result = f"Python output:\n{run_python(code)}"
166
-
167
- if tool_result:
168
- conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your ANSWER: <answer>"
169
  else:
170
- conversation += f"\n\nAssistant: {response}\n\nProvide your final answer as: ANSWER: <answer>"
 
 
171
 
172
- # Final attempt fallback
173
- conversation += "\n\nGive only the final answer as: ANSWER: <answer>"
174
  last = self.call_llm(conversation)
175
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
176
  if answer_match:
@@ -210,21 +243,23 @@ def run_and_submit_all(profile: gr.OAuthProfile | None):
210
 
211
  results_log = []
212
  answers_payload = []
 
213
  for item in questions_data:
214
  task_id = item.get("task_id")
215
  question_text = item.get("question")
216
  if not task_id or question_text is None:
217
  continue
 
218
  try:
219
- submitted_answer = agent(question_text)
 
220
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
221
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
222
  except Exception as e:
223
  print(f"Error on task {task_id}: {e}")
224
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
225
 
226
- # Pausing between questions to ensure safety against rate limits
227
- time.sleep(3)
228
 
229
  if not answers_payload:
230
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
@@ -262,8 +297,9 @@ with gr.Blocks() as demo:
262
 
263
  **Instructions:**
264
  1. Make sure `GROQ_API_KEY` is set in your Space secrets
265
- 2. Log in with your Hugging Face account below
266
- 3. Click **Run Evaluation & Submit All Answers**
 
267
  """
268
  )
269
  gr.LoginButton()
 
11
 
12
  # --- Tool Implementations ---
13
 
14
+ def download_task_file(task_id: str) -> str:
15
+ """Checks the API for attached files and downloads them to the working directory."""
16
+ url = f"{DEFAULT_API_URL}/files/{task_id}"
17
+ try:
18
+ response = requests.get(url, timeout=15)
19
+ if response.status_code == 200:
20
+ # Try to grab the exact filename from the headers
21
+ cd = response.headers.get('content-disposition')
22
+ filename = f"file_{task_id[:8]}.tmp"
23
+ if cd:
24
+ match = re.search(r'filename="?([^"]+)"?', cd)
25
+ if match:
26
+ filename = match.group(1)
27
+
28
+ # Save the file locally
29
+ with open(filename, 'wb') as f:
30
+ f.write(response.content)
31
+ return f"\n[System Alert: A file required for this task was downloaded as '{filename}'. Use your PYTHON tool to read and analyze it.]\n"
32
+ return "" # No file attached
33
+ except Exception as e:
34
+ print(f"File download error for {task_id}: {e}")
35
+ return ""
36
+
37
  def web_search(query: str) -> str:
38
  try:
39
  with DDGS() as ddgs:
 
47
  except Exception as e:
48
  return f"Search error: {e}"
49
 
 
50
  def wikipedia_search(query: str) -> str:
51
  try:
52
  search_url = "https://en.wikipedia.org/w/api.php"
 
71
  except Exception as e:
72
  return f"Wikipedia error: {e}"
73
 
 
74
  def run_python(code: str) -> str:
75
  import sys
76
  from io import StringIO
 
80
  exec_globals = {}
81
  exec(code, exec_globals)
82
  output = sys.stdout.getvalue()
83
+ # Remind the agent to use print() if it forgot
84
+ return output if output else "Code executed successfully, but printed nothing. Use print() to see data."
85
  except Exception as e:
86
+ return f"Python execution error: {e}"
87
  finally:
88
  sys.stdout = old_stdout
89
 
 
90
  class SmartAgent:
91
  def __init__(self):
92
  self.api_key = os.getenv("GROQ_API_KEY")
93
  if not self.api_key:
94
  raise ValueError("GROQ_API_KEY environment variable not set! Please add it to your Space secrets.")
95
 
 
96
  self.api_url = "https://api.groq.com/openai/v1/chat/completions"
97
  self.model = "llama-3.3-70b-versatile"
98
  print(f"SmartAgent initialized with Groq ({self.model})")
 
109
  "max_tokens": 1024
110
  }
111
 
 
112
  wait_times = [15, 30, 60]
113
  for attempt, wait_time in enumerate(wait_times):
114
  try:
 
118
  return data["choices"][0]["message"]["content"].strip()
119
  except requests.exceptions.HTTPError as e:
120
  status = response.status_code
121
+ if status in [429, 503, 500]:
122
  print(f"Groq API Error ({status})! Waiting {wait_time} seconds... (Attempt {attempt+1}/3)")
123
  time.sleep(wait_time)
124
  else:
 
126
 
127
  raise Exception("Failed to call LLM after 3 attempts due to API limits or server errors.")
128
 
129
+ def __call__(self, question: str, task_id: str) -> str:
130
  print(f"\nQuestion: {question[:100]}...")
131
+
132
+ # Download file if it exists and pass the filename to the prompt
133
+ file_alert = download_task_file(task_id)
134
+
135
+ system = """You are an elite AI assistant solving benchmark questions. You must think step-by-step.
136
 
137
+ You have access to a Python environment, Wikipedia, and Web Search.
138
 
139
+ You MUST use tools by outputting exactly one of the following formats per step:
140
+ THOUGHT: <your reasoning for what to do next>
141
  SEARCH: <query>
142
+
143
+ Or:
144
+ THOUGHT: <your reasoning>
145
  WIKIPEDIA: <query>
 
146
 
147
+ Or:
148
+ THOUGHT: <your reasoning>
149
+ PYTHON:
150
+ ```python
151
+ <your python code here (make sure to use print() to see outputs)>
152
+ ```
153
+
154
+ When you are 100% sure you have the final answer, output:
155
+ THOUGHT: <your final reasoning>
156
  ANSWER: <your exact short answer>
157
 
158
+ Rules for the ANSWER:
159
  - DO NOT wrap the answer in "FINAL ANSWER: " or any other text. Output strictly "ANSWER: " followed by the exact answer string.
160
+ - Numbers only (no units unless asked, no commas in numbers, express money as numbers with decimals)
161
  - Short phrases (no articles like a/the, no abbreviations for proper nouns)
162
  - Comma-separated list if multiple items needed
163
+ - Exact match required - be very precise!"""
164
 
165
+ conversation = f"{system}\n\nQuestion: {question}{file_alert}"
166
 
167
+ for iteration in range(7):
168
+ time.sleep(2.5) # Groq rate limit safety
 
169
 
170
  response = self.call_llm(conversation)
171
+ print(f" LLM [{iteration}]: \n{response[:300]}...\n")
172
 
 
173
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', response, re.IGNORECASE)
174
  if answer_match:
175
  answer = answer_match.group(1).strip()
 
181
  search_match = re.search(r'SEARCH:\s*(.+?)(?:\n|$)', response)
182
  wiki_match = re.search(r'WIKIPEDIA:\s*(.+?)(?:\n|$)', response)
183
  python_match = re.search(r'PYTHON:\s*```(?:python)?\n?(.*?)```', response, re.DOTALL)
184
+
185
  if not python_match:
186
  python_match = re.search(r'PYTHON:\s*(.+?)(?:\nSEARCH|\nWIKIPEDIA|\nANSWER|$)', response, re.DOTALL)
187
 
 
195
  tool_result = f"Wikipedia results for '{query}':\n{wikipedia_search(query)}"
196
  elif python_match:
197
  code = python_match.group(1).strip()
198
+ print(f" Tool: python({code[:50]}...)")
199
  tool_result = f"Python output:\n{run_python(code)}"
 
 
 
200
  else:
201
+ tool_result = "No valid tool call detected. Please use SEARCH, WIKIPEDIA, PYTHON, or ANSWER."
202
+
203
+ conversation += f"\n\nAssistant: {response}\n\nTool Result: {tool_result}\n\nNow provide your next THOUGHT and tool, or your ANSWER."
204
 
205
+ # Fallback if it runs out of iterations
206
+ conversation += "\n\nYou are out of steps. Give only the final answer as: ANSWER: <answer>"
207
  last = self.call_llm(conversation)
208
  answer_match = re.search(r'ANSWER:\s*(.+?)(?:\n|$)', last, re.IGNORECASE)
209
  if answer_match:
 
243
 
244
  results_log = []
245
  answers_payload = []
246
+
247
  for item in questions_data:
248
  task_id = item.get("task_id")
249
  question_text = item.get("question")
250
  if not task_id or question_text is None:
251
  continue
252
+
253
  try:
254
+ # We now pass the task_id to the agent so it can download the file!
255
+ submitted_answer = agent(question_text, task_id)
256
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
257
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
258
  except Exception as e:
259
  print(f"Error on task {task_id}: {e}")
260
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"ERROR: {e}"})
261
 
262
+ time.sleep(3) # Safety pause between questions
 
263
 
264
  if not answers_payload:
265
  return "Agent did not produce any answers.", pd.DataFrame(results_log)
 
297
 
298
  **Instructions:**
299
  1. Make sure `GROQ_API_KEY` is set in your Space secrets
300
+ 2. Make sure `pandas` and `openpyxl` are in your `requirements.txt`
301
+ 3. Log in with your Hugging Face account below
302
+ 4. Click **Run Evaluation & Submit All Answers**
303
  """
304
  )
305
  gr.LoginButton()