osma77 commited on
Commit
6962438
·
verified ·
1 Parent(s): 0fe1872

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +202 -393
app.py CHANGED
@@ -111,7 +111,7 @@ logger = logging.getLogger(__name__)
111
  # logger.error(f"Agent failed to generate response: {e}")
112
  # raise
113
  # from langgraph.gr import StateGraph, END
114
- from langgraph.graph import StateGraph, END
115
  from langchain_core.messages import HumanMessage, AIMessage
116
  from langchain_openai import AzureChatOpenAI
117
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
@@ -128,8 +128,7 @@ from langchain_core.agents import AgentAction, AgentFinish
128
  class BasicAgent:
129
  def __init__(self, model_id: Optional[str] = None, api_key: Optional[str] = None):
130
  """
131
- Initialize BasicAgent optimized for 30%+ GAIA benchmark success.
132
- Based on winning strategies from top performers like Trase Agent (35.55%).
133
  """
134
 
135
  # Initialize model
@@ -148,564 +147,374 @@ class BasicAgent:
148
  self.app = self.workflow.compile()
149
 
150
  def _initialize_tools(self):
151
- """Initialize tools with winning agent optimizations."""
152
 
153
  @tool
154
- def smart_web_search(query: str) -> str:
155
  """
156
- Intelligent web search with query optimization and result filtering.
157
- Automatically tries multiple search strategies if initial search fails.
158
  """
159
  try:
160
- ddg = DuckDuckGoSearchAPIWrapper(max_results=5, region="en-us")
161
-
162
- # Try exact query first
163
  results = ddg.run(query)
164
-
165
- # If results are poor, try alternative queries
166
- if len(results) < 100 or "not found" in results.lower():
167
- # Try with quotes for exact phrases
168
- alt_query = f'"{query}"' if '"' not in query else query.replace('"', '')
169
- alt_results = ddg.run(alt_query)
170
- if len(alt_results) > len(results):
171
- results = alt_results
172
-
173
- return results[:2000]
174
  except Exception as e:
175
  return f"Search failed: {str(e)}"
176
 
177
- @tool
178
- def enhanced_wikipedia(query: str) -> str:
179
  """
180
- Enhanced Wikipedia search with disambiguation and summary extraction.
 
181
  """
182
  try:
183
- wiki = WikipediaAPIWrapper(
184
- top_k_results=3,
185
- doc_content_chars_max=1500,
186
- load_all_available_meta=True
187
- )
188
-
189
- # Try exact search first
190
  result = wiki.run(query)
191
-
192
- # If result seems incomplete, try variations
193
- if len(result) < 200 and " " in query:
194
- # Try individual words
195
- words = query.split()
196
- for word in sorted(words, key=len, reverse=True):
197
- if len(word) > 3:
198
- alt_result = wiki.run(word)
199
- if len(alt_result) > len(result):
200
- result = alt_result + f"\n\n[Alternative search for: {word}]"
201
- break
202
-
203
  return result
204
  except Exception as e:
205
  return f"Wikipedia search failed: {str(e)}"
206
 
207
  @tool
208
- def precision_calculator(code: str) -> str:
209
  """
210
- High-precision Python calculator with enhanced mathematical libraries.
211
- Includes automatic result formatting and error recovery.
 
212
  """
213
  try:
214
- # Enhanced execution environment
215
  exec_globals = {
216
  '__builtins__': __builtins__,
217
  'math': math,
218
  'np': np,
219
  'numpy': np,
 
220
  'os': os,
221
- 're': re,
222
- 'round': round,
223
- 'abs': abs,
224
- 'min': min,
225
- 'max': max,
226
- 'sum': sum,
227
- 'len': len,
228
- 'sorted': sorted,
229
- 'enumerate': enumerate,
230
- 'zip': zip,
231
- 'range': range,
232
- 'list': list,
233
- 'dict': dict,
234
- 'set': set,
235
- 'tuple': tuple
236
  }
237
 
238
- # Import additional libraries if available
239
  try:
240
  import pandas as pd
241
- import datetime as dt
242
- from decimal import Decimal, getcontext
243
- getcontext().prec = 50 # High precision for calculations
244
-
245
- exec_globals.update({
246
- 'pd': pd,
247
- 'pandas': pd,
248
- 'dt': dt,
249
- 'datetime': dt,
250
- 'Decimal': Decimal
251
- })
252
  except:
253
  pass
254
 
255
- # Capture both stdout and result
256
  import io
257
  import sys
258
  old_stdout = sys.stdout
259
  sys.stdout = captured_output = io.StringIO()
260
 
261
- # Execute with result capture
262
- try:
263
- # Try to execute and capture last expression
264
- lines = code.strip().split('\n')
265
- if lines:
266
- # Execute all but last line
267
- if len(lines) > 1:
268
- exec('\n'.join(lines[:-1]), exec_globals)
269
-
270
- # Evaluate last line if it's an expression
271
- last_line = lines[-1].strip()
272
- if last_line and not any(last_line.startswith(keyword) for keyword in ['print', 'if', 'for', 'while', 'def', 'class', 'import', 'from']):
273
- try:
274
- result = eval(last_line, exec_globals)
275
- if result is not None:
276
- print(f"Result: {result}")
277
- except:
278
- exec(last_line, exec_globals)
279
- else:
280
- exec(last_line, exec_globals)
281
- except:
282
- # Fallback: execute entire code block
283
- exec(code, exec_globals)
284
 
285
  # Get output
286
  sys.stdout = old_stdout
287
  output = captured_output.getvalue()
288
 
289
- return output if output.strip() else "Calculation completed (no output)"
290
 
291
  except Exception as e:
292
- return f"Calculation error: {str(e)}\nTry breaking down the calculation into smaller steps."
293
 
294
  @tool
295
- def smart_file_handler(task: str) -> str:
296
  """
297
- Intelligent file detection and processing with automatic format recognition.
 
 
298
  """
299
  try:
300
- # Scan for files
301
- current_files = []
302
- for item in os.listdir('.'):
303
- if os.path.isfile(item):
304
- size = os.path.getsize(item)
305
- current_files.append(f"{item} ({size} bytes)")
306
-
307
- if not current_files:
308
- return "No files found in current directory. Please upload files if needed."
309
-
310
- file_info = f"Available files: {current_files}\n\n"
311
- file_info += f"Task: {task}\n\n"
312
-
313
- # Auto-detect file types and suggest processing
314
- processing_suggestions = []
315
- for file_item in current_files:
316
- filename = file_item.split(' (')[0]
317
- ext = filename.split('.')[-1].lower() if '.' in filename else ''
318
-
319
- if ext in ['csv', 'tsv']:
320
- processing_suggestions.append(f"For {filename}: Use precision_calculator with pandas.read_csv('{filename}')")
321
- elif ext in ['json']:
322
- processing_suggestions.append(f"For {filename}: Use precision_calculator with json.load(open('{filename}'))")
323
- elif ext in ['txt', 'md']:
324
- processing_suggestions.append(f"For {filename}: Use precision_calculator with open('{filename}').read()")
325
- elif ext in ['jpg', 'png', 'jpeg', 'gif']:
326
- processing_suggestions.append(f"For {filename}: Use precision_calculator with PIL.Image.open('{filename}')")
327
-
328
- if processing_suggestions:
329
- file_info += "Processing suggestions:\n" + "\n".join(processing_suggestions)
330
- else:
331
- file_info += "Use precision_calculator to process these files with appropriate Python libraries."
332
-
333
- return file_info
334
 
 
 
335
  except Exception as e:
336
- return f"File handling error: {str(e)}"
337
 
338
  @tool
339
- def verification_search(claim: str) -> str:
340
  """
341
- Verification-focused search to double-check facts and calculations.
342
- Uses multiple sources and cross-references information.
343
  """
344
  try:
345
- # Search with verification keywords
346
- verification_queries = [
347
- claim,
348
- f"verify {claim}",
349
- f"fact check {claim}",
350
- f"{claim} correct accurate"
351
- ]
352
-
353
- all_results = []
354
- ddg = DuckDuckGoSearchAPIWrapper(max_results=3)
355
 
356
- for query in verification_queries:
357
- try:
358
- result = ddg.run(query)
359
- if result and len(result) > 50:
360
- all_results.append(f"Query: {query}\nResults: {result[:500]}...\n")
361
- break # Use first successful query
362
- except:
363
- continue
364
-
365
- return "\n".join(all_results) if all_results else f"Could not verify: {claim}"
366
 
 
367
  except Exception as e:
368
- return f"Verification failed: {str(e)}"
369
 
370
- return [smart_web_search, enhanced_wikipedia, precision_calculator, smart_file_handler, verification_search]
371
 
372
  def _create_workflow(self):
373
- """Create ReAct-inspired workflow for GAIA success."""
374
  workflow = StateGraph(dict)
375
 
376
- workflow.add_node("reasoner", self._reasoning_node)
377
- workflow.add_node("actor", self._action_node)
378
- workflow.add_node("observer", self._observation_node)
379
- workflow.add_node("finalizer", self._finalization_node)
380
 
381
- workflow.set_entry_point("reasoner")
382
 
383
  workflow.add_conditional_edges(
384
- "reasoner",
385
- self._reasoning_decision,
386
  {
387
- "act": "actor",
388
- "final": "finalizer"
389
  }
390
  )
391
 
392
- workflow.add_edge("actor", "observer")
393
-
394
  workflow.add_conditional_edges(
395
- "observer",
396
- self._observation_decision,
397
  {
398
- "continue": "reasoner",
399
- "finalize": "finalizer"
400
  }
401
  )
402
 
403
- workflow.add_edge("finalizer", END)
404
 
405
  return workflow
406
 
407
- def _reasoning_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
408
- """
409
- Advanced reasoning node using CoT + ReAct methodology.
410
- Based on successful GAIA agent strategies.
411
- """
412
- question = state.get("question", "")
413
  step_count = state.get("step_count", 0)
414
  max_steps = state.get("max_steps", 4)
415
- execution_log = state.get("execution_log", [])
416
 
417
  if step_count >= max_steps:
418
  return {
419
  **state,
420
- "reasoning": "Maximum steps reached. Must provide final answer with available information.",
421
- "decision": "final"
422
  }
423
 
424
- # Build context from execution log
425
- context = ""
426
- if execution_log:
427
- context = "\n".join([f"Step {i+1}: {log}" for i, log in enumerate(execution_log)])
428
-
429
- reasoning_prompt = f"""You are an expert GAIA benchmark solver with a 35%+ success rate. Use systematic reasoning to solve this question.
430
 
431
- QUESTION: {question}
432
 
433
- EXECUTION HISTORY:
434
- {context if context else "No previous steps."}
435
 
436
- CRITICAL GAIA SUCCESS PRINCIPLES:
437
- 1. EXACT ANSWERS ONLY: No explanations, just the precise answer (number, name, date, yes/no)
438
- 2. STRATEGIC TOOL USE: Each tool call must have a clear purpose toward the final answer
439
- 3. VERIFICATION: Double-check facts and calculations when possible
440
- 4. EFFICIENCY: Level 1 should be solved in 1-3 steps maximum
441
 
442
  AVAILABLE TOOLS:
443
- - smart_web_search: Current information, recent events, specific facts
444
- - enhanced_wikipedia: Established facts, biographical data, historical information
445
- - precision_calculator: All calculations, data processing, file analysis
446
- - smart_file_handler: File detection and processing guidance
447
- - verification_search: Fact-checking and answer verification
448
-
449
- REASONING STRATEGY:
450
- 1. Identify the EXACT answer format needed (What type: number, name, date, etc.?)
451
- 2. Determine the specific information required (What facts do I need?)
452
- 3. Choose the optimal tool for that information (Which tool is best?)
453
- 4. Plan verification if needed (How can I double-check?)
454
 
455
- YOUR REASONING TASK:
456
- Think step-by-step about what you need to solve this question. Then decide:
 
 
 
457
 
458
- FORMAT YOUR RESPONSE AS:
459
- THOUGHT: [Your detailed reasoning about what's needed]
460
- ACTION: [tool_name]
461
- INPUT: [specific input for the tool]
462
- PURPOSE: [what you expect to learn/achieve]
463
 
464
- OR if you have enough information:
465
 
466
- THOUGHT: [Why you have enough information]
467
- FINAL_ANSWER: [exact answer only]
468
-
469
- Be extremely specific in your tool inputs. Use exact names, dates, phrases from the question."""
470
-
471
- response = self.model.invoke([{"role": "user", "content": reasoning_prompt}])
472
  content = response.content.strip()
473
 
474
- # Parse reasoning response
475
- if "FINAL_ANSWER:" in content:
476
- final_answer = re.search(r'FINAL_ANSWER:\s*(.+?)(?:\n|$)', content, re.IGNORECASE | re.DOTALL)
477
- if final_answer:
478
- answer = final_answer.group(1).strip()
479
- return {
480
- **state,
481
- "reasoning": content,
482
- "final_answer": answer,
483
- "decision": "final"
484
- }
485
-
486
- # Parse action
487
- thought_match = re.search(r'THOUGHT:\s*(.+?)(?:ACTION:|$)', content, re.IGNORECASE | re.DOTALL)
488
- action_match = re.search(r'ACTION:\s*(\w+)', content, re.IGNORECASE)
489
- input_match = re.search(r'INPUT:\s*(.+?)(?:PURPOSE:|$)', content, re.IGNORECASE | re.DOTALL)
490
- purpose_match = re.search(r'PURPOSE:\s*(.+?)$', content, re.IGNORECASE | re.DOTALL)
491
-
492
- if action_match and input_match:
493
- thought = thought_match.group(1).strip() if thought_match else "No reasoning provided"
494
- action = action_match.group(1).strip()
495
- tool_input = input_match.group(1).strip()
496
- purpose = purpose_match.group(1).strip() if purpose_match else "No purpose specified"
497
-
498
  return {
499
  **state,
500
- "reasoning": thought,
501
- "current_action": action,
502
- "current_input": tool_input,
503
- "current_purpose": purpose,
504
- "decision": "act",
505
- "step_count": step_count + 1
506
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
507
  else:
508
- # Fallback - treat as final answer
509
  return {
510
  **state,
511
- "reasoning": content,
512
  "final_answer": content,
513
- "decision": "final"
514
  }
515
 
516
- def _action_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
517
- """Execute the planned action with enhanced error handling."""
518
- action = state.get("current_action", "")
519
  tool_input = state.get("current_input", "")
520
- purpose = state.get("current_purpose", "")
 
521
 
522
- # Tool mapping with fuzzy matching
523
  tool_map = {tool.name: tool for tool in self.tools}
524
 
525
- # Add common aliases
526
- aliases = {
527
- "search": "smart_web_search",
528
- "web": "smart_web_search",
529
- "google": "smart_web_search",
530
- "wiki": "enhanced_wikipedia",
531
- "wikipedia": "enhanced_wikipedia",
532
- "calc": "precision_calculator",
533
- "calculate": "precision_calculator",
534
- "python": "precision_calculator",
535
- "code": "precision_calculator",
536
- "file": "smart_file_handler",
537
- "verify": "verification_search",
538
- "check": "verification_search"
539
  }
540
 
541
- # Find the right tool
542
- tool_name = action.lower()
543
- if tool_name in aliases:
544
- tool_name = aliases[tool_name]
545
-
546
  matched_tool = None
547
- for real_name, tool in tool_map.items():
548
- if tool_name in real_name.lower() or real_name.lower() in tool_name:
549
- matched_tool = tool
550
  break
551
 
 
 
 
552
  if matched_tool:
553
  try:
554
  result = matched_tool.run(tool_input)
 
 
 
555
  return {
556
  **state,
557
- "action_result": result,
558
- "action_success": True,
559
- "last_tool": action,
560
- "last_input": tool_input
561
  }
562
  except Exception as e:
 
 
563
  return {
564
  **state,
565
- "action_result": f"Tool execution failed: {str(e)}",
566
- "action_success": False,
567
- "last_tool": action,
568
- "last_input": tool_input
569
  }
570
  else:
571
  available = list(tool_map.keys())
 
 
572
  return {
573
  **state,
574
- "action_result": f"Tool '{action}' not found. Available tools: {available}",
575
- "action_success": False,
576
- "last_tool": action,
577
- "last_input": tool_input
578
  }
579
 
580
- def _observation_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
581
- """Process and analyze the action result."""
582
- result = state.get("action_result", "")
583
- success = state.get("action_success", False)
584
- tool = state.get("last_tool", "")
585
- purpose = state.get("current_purpose", "")
586
- execution_log = state.get("execution_log", [])
587
-
588
- # Create observation summary
589
- if success:
590
- observation = f"Successfully used {tool}: {result[:300]}..." if len(result) > 300 else f"Successfully used {tool}: {result}"
591
- else:
592
- observation = f"Failed to use {tool}: {result}"
593
-
594
- execution_log.append(observation)
595
-
596
- # Determine if we should continue or finalize
597
- # Check if we have a clear answer in the result
598
- answer_indicators = [
599
- "the answer is", "result:", "final answer:", "solution:",
600
- "equals", "=", "total:", "amount:", "number:", "date:", "name:"
601
- ]
602
-
603
- has_potential_answer = any(indicator in result.lower() for indicator in answer_indicators)
604
-
605
- # Also check if result contains specific formats (numbers, dates, names)
606
- has_number = re.search(r'\b\d+\b', result)
607
- has_date = re.search(r'\b\d{4}\b|\b\d{1,2}/\d{1,2}/\d{2,4}\b', result)
608
-
609
- if has_potential_answer or has_number or has_date:
610
- decision = "finalize"
611
- else:
612
- decision = "continue"
613
-
614
- return {
615
- **state,
616
- "execution_log": execution_log,
617
- "last_observation": observation,
618
- "decision": decision
619
- }
620
-
621
- def _finalization_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
622
- """Extract and clean the final answer."""
623
- question = state.get("question", "")
624
- execution_log = state.get("execution_log", [])
625
- action_result = state.get("action_result", "")
626
  final_answer = state.get("final_answer", "")
 
 
627
 
628
- if final_answer:
629
- cleaned_answer = self._clean_final_answer(final_answer)
630
- else:
631
- # Extract answer from the last action result
632
- extraction_prompt = f"""Extract the exact answer to this question from the provided information.
633
-
634
- QUESTION: {question}
635
 
636
- INFORMATION GATHERED:
637
- {action_result}
638
 
639
- INSTRUCTIONS:
640
- - Provide ONLY the exact answer - no explanations, no context
641
- - If it's a number, provide just the number
642
- - If it's a name, provide just the name
643
- - If it's a date, provide just the date
644
- - If it's yes/no, provide just "Yes" or "No"
645
- - If you cannot determine the answer, respond with "Unable to determine"
646
 
647
  EXACT ANSWER:"""
648
 
649
- response = self.model.invoke([{"role": "user", "content": extraction_prompt}])
650
- cleaned_answer = self._clean_final_answer(response.content)
 
 
 
651
 
652
  return {
653
  **state,
654
- "final_answer": cleaned_answer,
655
  "completed": True
656
  }
657
 
658
- def _clean_final_answer(self, answer: str) -> str:
659
- """Clean and format the final answer for GAIA submission."""
660
  if not answer:
661
  return "No answer found"
662
-
663
- cleaned = answer.strip()
664
-
665
- # Remove common prefixes and suffixes
666
  prefixes = [
667
  "the answer is", "answer:", "final answer:", "result:",
668
- "exact answer:", "solution:", "response:", "output:",
669
- "based on", "according to", "it appears", "it seems"
670
  ]
671
 
 
672
  for prefix in prefixes:
673
  if cleaned.lower().startswith(prefix):
674
  cleaned = cleaned[len(prefix):].strip()
675
- break
676
 
677
- # Remove quotes
678
- if (cleaned.startswith('"') and cleaned.endswith('"')) or (cleaned.startswith("'") and cleaned.endswith("'")):
679
  cleaned = cleaned[1:-1]
680
-
681
- # Remove trailing periods for non-sentence answers
682
- if len(cleaned.split()) <= 3 and cleaned.endswith('.'):
683
- cleaned = cleaned[:-1]
684
-
685
- # Handle special cases
686
- if cleaned.lower() in ['yes', 'no', 'true', 'false']:
687
- cleaned = cleaned.capitalize()
688
-
689
  return cleaned
690
 
691
- def _reasoning_decision(self, state: Dict[str, Any]) -> str:
692
- """Determine next step from reasoning."""
693
- return state.get("decision", "act")
694
 
695
- def _observation_decision(self, state: Dict[str, Any]) -> str:
696
- """Determine next step from observation."""
697
- return state.get("decision", "continue")
698
 
699
  def run(self, question: str, max_steps: int = 4) -> str:
700
  """
701
- Run the agent with GAIA-winning optimizations.
702
- Designed to achieve 30%+ success rate on GAIA Level 1.
703
  """
704
  initial_state = {
705
- "question": question,
706
  "step_count": 0,
707
  "max_steps": max_steps,
708
- "execution_log": [],
709
  "completed": False
710
  }
711
 
 
111
  # logger.error(f"Agent failed to generate response: {e}")
112
  # raise
113
  # from langgraph.gr import StateGraph, END
114
+ from langgraph import StateGraph, END
115
  from langchain_core.messages import HumanMessage, AIMessage
116
  from langchain_openai import AzureChatOpenAI
117
  from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
 
128
  class BasicAgent:
129
  def __init__(self, model_id: Optional[str] = None, api_key: Optional[str] = None):
130
  """
131
+ Initialize BasicAgent optimized for GAIA benchmark success.
 
132
  """
133
 
134
  # Initialize model
 
147
  self.app = self.workflow.compile()
148
 
149
  def _initialize_tools(self):
150
+ """Initialize tools with GAIA-specific optimizations."""
151
 
152
  @tool
153
+ def web_search(query: str) -> str:
154
  """
155
+ Search for current information on the web. Use specific, targeted queries.
156
+ Best for: recent events, current data, specific facts, news.
157
  """
158
  try:
159
+ ddg = DuckDuckGoSearchAPIWrapper(max_results=5)
 
 
160
  results = ddg.run(query)
161
+ return results[:1500]
 
 
 
 
 
 
 
 
 
162
  except Exception as e:
163
  return f"Search failed: {str(e)}"
164
 
165
+ @tool
166
+ def wikipedia_search(query: str) -> str:
167
  """
168
+ Search Wikipedia for established facts, definitions, historical data.
169
+ Best for: biographical info, historical events, scientific concepts, definitions.
170
  """
171
  try:
172
+ wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=1000)
 
 
 
 
 
 
173
  result = wiki.run(query)
 
 
 
 
 
 
 
 
 
 
 
 
174
  return result
175
  except Exception as e:
176
  return f"Wikipedia search failed: {str(e)}"
177
 
178
  @tool
179
+ def python_calculator(code: str) -> str:
180
  """
181
+ Execute Python code for calculations, data processing, file operations.
182
+ Best for: complex math, data analysis, file processing, calculations.
183
+ Always include print() statements to see results.
184
  """
185
  try:
186
+ # Enhanced Python environment
187
  exec_globals = {
188
  '__builtins__': __builtins__,
189
  'math': math,
190
  'np': np,
191
  'numpy': np,
192
+ 'pd': None, # Will try to import if needed
193
  'os': os,
194
+ 're': re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
 
197
+ # Try to import common libraries
198
  try:
199
  import pandas as pd
200
+ exec_globals['pd'] = pd
201
+ exec_globals['pandas'] = pd
 
 
 
 
 
 
 
 
 
202
  except:
203
  pass
204
 
205
+ # Capture output
206
  import io
207
  import sys
208
  old_stdout = sys.stdout
209
  sys.stdout = captured_output = io.StringIO()
210
 
211
+ # Execute code
212
+ exec(code, exec_globals)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  # Get output
215
  sys.stdout = old_stdout
216
  output = captured_output.getvalue()
217
 
218
+ return output if output.strip() else "Code executed successfully (no output)"
219
 
220
  except Exception as e:
221
+ return f"Python execution error: {str(e)}"
222
 
223
  @tool
224
+ def simple_math(expression: str) -> str:
225
  """
226
+ Evaluate simple mathematical expressions quickly.
227
+ Best for: basic arithmetic, simple calculations.
228
+ Examples: "2+3*4", "sqrt(16)", "sin(pi/4)"
229
  """
230
  try:
231
+ # Safe evaluation environment
232
+ allowed_names = {
233
+ k: v for k, v in math.__dict__.items() if not k.startswith("__")
234
+ }
235
+ allowed_names.update({
236
+ "abs": abs, "round": round, "min": min, "max": max,
237
+ "sum": sum, "pow": pow, "divmod": divmod
238
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
 
240
+ result = eval(expression, {"__builtins__": {}}, allowed_names)
241
+ return str(result)
242
  except Exception as e:
243
+ return f"Math error: {str(e)}"
244
 
245
  @tool
246
+ def file_analyzer(task: str) -> str:
247
  """
248
+ Analyze files in the current directory.
249
+ Best for: examining uploaded files, extracting data from files.
250
  """
251
  try:
252
+ # List available files
253
+ files = [f for f in os.listdir('.') if os.path.isfile(f)]
 
 
 
 
 
 
 
 
254
 
255
+ result = f"Available files: {files}\n"
256
+ result += f"Task: {task}\n"
257
+ result += "Use python_calculator for detailed file processing."
 
 
 
 
 
 
 
258
 
259
+ return result
260
  except Exception as e:
261
+ return f"File analysis error: {str(e)}"
262
 
263
+ return [web_search, wikipedia_search, python_calculator, simple_math, file_analyzer]
264
 
265
  def _create_workflow(self):
266
+ """Create optimized LangGraph workflow."""
267
  workflow = StateGraph(dict)
268
 
269
+ workflow.add_node("planner", self._planner_node)
270
+ workflow.add_node("executor", self._executor_node)
271
+ workflow.add_node("validator", self._validator_node)
 
272
 
273
+ workflow.set_entry_point("planner")
274
 
275
  workflow.add_conditional_edges(
276
+ "planner",
277
+ self._plan_decision,
278
  {
279
+ "execute": "executor",
280
+ "final": "validator"
281
  }
282
  )
283
 
 
 
284
  workflow.add_conditional_edges(
285
+ "executor",
286
+ self._execution_decision,
287
  {
288
+ "continue": "planner",
289
+ "validate": "validator"
290
  }
291
  )
292
 
293
+ workflow.add_edge("validator", END)
294
 
295
  return workflow
296
 
297
+ def _planner_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
298
+ """Enhanced planning node focused on GAIA success patterns."""
299
+ messages = state.get("messages", [])
 
 
 
300
  step_count = state.get("step_count", 0)
301
  max_steps = state.get("max_steps", 4)
302
+ plan_history = state.get("plan_history", [])
303
 
304
  if step_count >= max_steps:
305
  return {
306
  **state,
307
+ "final_answer": "Maximum steps reached. Providing best available answer.",
308
+ "action_type": "final"
309
  }
310
 
311
+ planning_prompt = f"""You are a GAIA benchmark specialist. Your task is to solve this question with MAXIMUM ACCURACY.
 
 
 
 
 
312
 
313
+ QUESTION: {messages[0]['content'] if messages else 'No question provided'}
314
 
315
+ EXECUTION HISTORY: {plan_history}
 
316
 
317
+ CRITICAL SUCCESS FACTORS:
318
+ 1. PRECISION: GAIA answers must be EXACT - no approximations, no explanations
319
+ 2. STEP EFFICIENCY: Use minimal steps (typically 1-3 for Level 1)
320
+ 3. TOOL SELECTION: Choose the RIGHT tool for each specific task
 
321
 
322
  AVAILABLE TOOLS:
323
+ - web_search: Current/recent information, news, live data
324
+ - wikipedia_search: Established facts, biographical data, historical info
325
+ - python_calculator: Complex calculations, data processing, file operations
326
+ - simple_math: Quick arithmetic, basic math functions
327
+ - file_analyzer: Examine uploaded files
 
 
 
 
 
 
328
 
329
+ PLANNING STRATEGY:
330
+ 1. Identify the EXACT answer format needed (number, name, date, etc.)
331
+ 2. Determine the specific information required
332
+ 3. Choose the BEST tool for that information type
333
+ 4. Plan for verification if needed
334
 
335
+ RESPONSE FORMAT:
336
+ If you need to use a tool: "EXECUTE: [tool_name] | INPUT: [specific_input] | GOAL: [what_you_expect]"
337
+ If you have the final answer: "FINAL: [exact_answer_only]"
 
 
338
 
339
+ Be extremely specific in your tool inputs. Avoid vague searches."""
340
 
341
+ response = self.model.invoke([{"role": "system", "content": planning_prompt}])
 
 
 
 
 
342
  content = response.content.strip()
343
 
344
+ if content.startswith("FINAL:"):
345
+ answer = content.replace("FINAL:", "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
  return {
347
  **state,
348
+ "final_answer": answer,
349
+ "action_type": "final",
350
+ "step_count": step_count
 
 
 
351
  }
352
+ elif content.startswith("EXECUTE:"):
353
+ # Parse execution command
354
+ try:
355
+ parts = content.replace("EXECUTE:", "").split("|")
356
+ tool_name = parts[0].split()[0].strip()
357
+ input_part = [p for p in parts if p.strip().startswith("INPUT:")][0]
358
+ tool_input = input_part.replace("INPUT:", "").strip()
359
+ goal_part = [p for p in parts if p.strip().startswith("GOAL:")][0] if len(parts) > 2 else ""
360
+ goal = goal_part.replace("GOAL:", "").strip() if goal_part else ""
361
+
362
+ return {
363
+ **state,
364
+ "current_tool": tool_name,
365
+ "current_input": tool_input,
366
+ "current_goal": goal,
367
+ "action_type": "execute",
368
+ "step_count": step_count + 1
369
+ }
370
+ except Exception as e:
371
+ return {
372
+ **state,
373
+ "final_answer": f"Planning error: {str(e)}",
374
+ "action_type": "final"
375
+ }
376
  else:
 
377
  return {
378
  **state,
 
379
  "final_answer": content,
380
+ "action_type": "final"
381
  }
382
 
383
+ def _executor_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
384
+ """Execute the planned action."""
385
+ tool_name = state.get("current_tool", "")
386
  tool_input = state.get("current_input", "")
387
+ goal = state.get("current_goal", "")
388
+ plan_history = state.get("plan_history", [])
389
 
390
+ # Find and execute tool
391
  tool_map = {tool.name: tool for tool in self.tools}
392
 
393
+ # Add flexible matching
394
+ tool_matches = {
395
+ "web_search": ["web", "search", "google", "internet"],
396
+ "wikipedia_search": ["wiki", "wikipedia"],
397
+ "python_calculator": ["python", "code", "calc", "calculate"],
398
+ "simple_math": ["math", "arithmetic"],
399
+ "file_analyzer": ["file", "analyze"]
 
 
 
 
 
 
 
400
  }
401
 
 
 
 
 
 
402
  matched_tool = None
403
+ for tool_real_name, aliases in tool_matches.items():
404
+ if tool_name.lower() in aliases or tool_name.lower() == tool_real_name.lower():
405
+ matched_tool = tool_map.get(tool_real_name)
406
  break
407
 
408
+ if not matched_tool:
409
+ matched_tool = tool_map.get(tool_name)
410
+
411
  if matched_tool:
412
  try:
413
  result = matched_tool.run(tool_input)
414
+ execution_record = f"STEP: Used {tool_name} with '{tool_input}' -> {result[:200]}..."
415
+ plan_history.append(execution_record)
416
+
417
  return {
418
  **state,
419
+ "last_result": result,
420
+ "plan_history": plan_history,
421
+ "action_type": "continue"
 
422
  }
423
  except Exception as e:
424
+ error_msg = f"Tool {tool_name} failed: {str(e)}"
425
+ plan_history.append(f"ERROR: {error_msg}")
426
  return {
427
  **state,
428
+ "last_result": error_msg,
429
+ "plan_history": plan_history,
430
+ "action_type": "validate"
 
431
  }
432
  else:
433
  available = list(tool_map.keys())
434
+ error_msg = f"Tool '{tool_name}' not found. Available: {available}"
435
+ plan_history.append(f"ERROR: {error_msg}")
436
  return {
437
  **state,
438
+ "last_result": error_msg,
439
+ "plan_history": plan_history,
440
+ "action_type": "validate"
 
441
  }
442
 
443
+ def _validator_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
444
+ """Validate and finalize the answer."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  final_answer = state.get("final_answer", "")
446
+ plan_history = state.get("plan_history", [])
447
+ last_result = state.get("last_result", "")
448
 
449
+ if not final_answer and last_result:
450
+ # Extract answer from last result
451
+ validation_prompt = f"""Extract the EXACT answer from this result for the GAIA question.
 
 
 
 
452
 
453
+ QUESTION: {state.get('messages', [{}])[0].get('content', '')}
454
+ TOOL RESULT: {last_result}
455
 
456
+ Provide ONLY the precise answer - no explanations, no context, just the exact answer required.
457
+ Examples:
458
+ - If asked for a number: "42"
459
+ - If asked for a name: "John Smith"
460
+ - If asked for a date: "1969"
461
+ - If asked for a yes/no: "Yes"
 
462
 
463
  EXACT ANSWER:"""
464
 
465
+ response = self.model.invoke([{"role": "user", "content": validation_prompt}])
466
+ final_answer = response.content.strip()
467
+
468
+ # Clean up the answer
469
+ final_answer = self._clean_answer(final_answer)
470
 
471
  return {
472
  **state,
473
+ "final_answer": final_answer,
474
  "completed": True
475
  }
476
 
477
+ def _clean_answer(self, answer: str) -> str:
478
+ """Clean and format the final answer for GAIA."""
479
  if not answer:
480
  return "No answer found"
481
+
482
+ # Remove common prefixes
 
 
483
  prefixes = [
484
  "the answer is", "answer:", "final answer:", "result:",
485
+ "exact answer:", "solution:", "response:", "output:"
 
486
  ]
487
 
488
+ cleaned = answer.strip()
489
  for prefix in prefixes:
490
  if cleaned.lower().startswith(prefix):
491
  cleaned = cleaned[len(prefix):].strip()
 
492
 
493
+ # Remove quotes if they wrap the entire answer
494
+ if cleaned.startswith('"') and cleaned.endswith('"'):
495
  cleaned = cleaned[1:-1]
496
+ if cleaned.startswith("'") and cleaned.endswith("'"):
497
+ cleaned = cleaned[1:-1]
498
+
 
 
 
 
 
 
499
  return cleaned
500
 
501
+ def _plan_decision(self, state: Dict[str, Any]) -> str:
502
+ """Decide whether to execute or finalize."""
503
+ return state.get("action_type", "execute")
504
 
505
+ def _execution_decision(self, state: Dict[str, Any]) -> str:
506
+ """Decide next step after execution."""
507
+ return state.get("action_type", "continue")
508
 
509
  def run(self, question: str, max_steps: int = 4) -> str:
510
  """
511
+ Run the agent with GAIA-optimized settings.
 
512
  """
513
  initial_state = {
514
+ "messages": [{"role": "user", "content": question}],
515
  "step_count": 0,
516
  "max_steps": max_steps,
517
+ "plan_history": [],
518
  "completed": False
519
  }
520