Final_Assignment_Template

Sleeping

App Files Files Community

osma77 commited on Aug 2, 2025

Commit

6962438

verified ·

1 Parent(s): 0fe1872

Update app.py

Browse files

Files changed (1) hide show

app.py +202 -393

app.py CHANGED Viewed

@@ -111,7 +111,7 @@ logger = logging.getLogger(__name__)
 #             logger.error(f"Agent failed to generate response: {e}")
 #             raise
  # from langgraph.gr import StateGraph, END
-from langgraph.graph import StateGraph, END
 from langchain_core.messages import HumanMessage, AIMessage
 from langchain_openai import AzureChatOpenAI
 from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
@@ -128,8 +128,7 @@ from langchain_core.agents import AgentAction, AgentFinish
 class BasicAgent:
     def __init__(self, model_id: Optional[str] = None, api_key: Optional[str] = None):
         """
-        Initialize BasicAgent optimized for 30%+ GAIA benchmark success.
-        Based on winning strategies from top performers like Trase Agent (35.55%).
         """
         # Initialize model
@@ -148,564 +147,374 @@ class BasicAgent:
         self.app = self.workflow.compile()
     def _initialize_tools(self):
-        """Initialize tools with winning agent optimizations."""
         @tool
-        def smart_web_search(query: str) -> str:
             """
-            Intelligent web search with query optimization and result filtering.
-            Automatically tries multiple search strategies if initial search fails.
             """
             try:
-                ddg = DuckDuckGoSearchAPIWrapper(max_results=5, region="en-us")
-                # Try exact query first
                 results = ddg.run(query)
-                # If results are poor, try alternative queries
-                if len(results) < 100 or "not found" in results.lower():
-                    # Try with quotes for exact phrases
-                    alt_query = f'"{query}"' if '"' not in query else query.replace('"', '')
-                    alt_results = ddg.run(alt_query)
-                    if len(alt_results) > len(results):
-                        results = alt_results
-                return results[:2000]
             except Exception as e:
                 return f"Search failed: {str(e)}"
-        @tool
-        def enhanced_wikipedia(query: str) -> str:
             """
-            Enhanced Wikipedia search with disambiguation and summary extraction.
             """
             try:
-                wiki = WikipediaAPIWrapper(
-                    top_k_results=3,
-                    doc_content_chars_max=1500,
-                    load_all_available_meta=True
-                )
-                # Try exact search first
                 result = wiki.run(query)
-                # If result seems incomplete, try variations
-                if len(result) < 200 and " " in query:
-                    # Try individual words
-                    words = query.split()
-                    for word in sorted(words, key=len, reverse=True):
-                        if len(word) > 3:
-                            alt_result = wiki.run(word)
-                            if len(alt_result) > len(result):
-                                result = alt_result + f"\n\n[Alternative search for: {word}]"
-                                break
                 return result
             except Exception as e:
                 return f"Wikipedia search failed: {str(e)}"
         @tool
-        def precision_calculator(code: str) -> str:
             """
-            High-precision Python calculator with enhanced mathematical libraries.
-            Includes automatic result formatting and error recovery.
             """
             try:
-                # Enhanced execution environment
                 exec_globals = {
                     '__builtins__': __builtins__,
                     'math': math,
                     'np': np,
                     'numpy': np,
                     'os': os,
-                    're': re,
-                    'round': round,
-                    'abs': abs,
-                    'min': min,
-                    'max': max,
-                    'sum': sum,
-                    'len': len,
-                    'sorted': sorted,
-                    'enumerate': enumerate,
-                    'zip': zip,
-                    'range': range,
-                    'list': list,
-                    'dict': dict,
-                    'set': set,
-                    'tuple': tuple
                 }
-                # Import additional libraries if available
                 try:
                     import pandas as pd
-                    import datetime as dt
-                    from decimal import Decimal, getcontext
-                    getcontext().prec = 50  # High precision for calculations
-                    exec_globals.update({
-                        'pd': pd,
-                        'pandas': pd,
-                        'dt': dt,
-                        'datetime': dt,
-                        'Decimal': Decimal
-                    })
                 except:
                     pass
-                # Capture both stdout and result
                 import io
                 import sys
                 old_stdout = sys.stdout
                 sys.stdout = captured_output = io.StringIO()
-                # Execute with result capture
-                try:
-                    # Try to execute and capture last expression
-                    lines = code.strip().split('\n')
-                    if lines:
-                        # Execute all but last line
-                        if len(lines) > 1:
-                            exec('\n'.join(lines[:-1]), exec_globals)
-                        # Evaluate last line if it's an expression
-                        last_line = lines[-1].strip()
-                        if last_line and not any(last_line.startswith(keyword) for keyword in ['print', 'if', 'for', 'while', 'def', 'class', 'import', 'from']):
-                            try:
-                                result = eval(last_line, exec_globals)
-                                if result is not None:
-                                    print(f"Result: {result}")
-                            except:
-                                exec(last_line, exec_globals)
-                        else:
-                            exec(last_line, exec_globals)
-                except:
-                    # Fallback: execute entire code block
-                    exec(code, exec_globals)
                 # Get output
                 sys.stdout = old_stdout
                 output = captured_output.getvalue()
-                return output if output.strip() else "Calculation completed (no output)"
             except Exception as e:
-                return f"Calculation error: {str(e)}\nTry breaking down the calculation into smaller steps."
         @tool
-        def smart_file_handler(task: str) -> str:
             """
-            Intelligent file detection and processing with automatic format recognition.
             """
             try:
-                # Scan for files
-                current_files = []
-                for item in os.listdir('.'):
-                    if os.path.isfile(item):
-                        size = os.path.getsize(item)
-                        current_files.append(f"{item} ({size} bytes)")
-                if not current_files:
-                    return "No files found in current directory. Please upload files if needed."
-                file_info = f"Available files: {current_files}\n\n"
-                file_info += f"Task: {task}\n\n"
-                # Auto-detect file types and suggest processing
-                processing_suggestions = []
-                for file_item in current_files:
-                    filename = file_item.split(' (')[0]
-                    ext = filename.split('.')[-1].lower() if '.' in filename else ''
-                    if ext in ['csv', 'tsv']:
-                        processing_suggestions.append(f"For {filename}: Use precision_calculator with pandas.read_csv('{filename}')")
-                    elif ext in ['json']:
-                        processing_suggestions.append(f"For {filename}: Use precision_calculator with json.load(open('{filename}'))")
-                    elif ext in ['txt', 'md']:
-                        processing_suggestions.append(f"For {filename}: Use precision_calculator with open('{filename}').read()")
-                    elif ext in ['jpg', 'png', 'jpeg', 'gif']:
-                        processing_suggestions.append(f"For {filename}: Use precision_calculator with PIL.Image.open('{filename}')")
-                if processing_suggestions:
-                    file_info += "Processing suggestions:\n" + "\n".join(processing_suggestions)
-                else:
-                    file_info += "Use precision_calculator to process these files with appropriate Python libraries."
-                return file_info
             except Exception as e:
-                return f"File handling error: {str(e)}"
         @tool
-        def verification_search(claim: str) -> str:
             """
-            Verification-focused search to double-check facts and calculations.
-            Uses multiple sources and cross-references information.
             """
             try:
-                # Search with verification keywords
-                verification_queries = [
-                    claim,
-                    f"verify {claim}",
-                    f"fact check {claim}",
-                    f"{claim} correct accurate"
-                ]
-                all_results = []
-                ddg = DuckDuckGoSearchAPIWrapper(max_results=3)
-                for query in verification_queries:
-                    try:
-                        result = ddg.run(query)
-                        if result and len(result) > 50:
-                            all_results.append(f"Query: {query}\nResults: {result[:500]}...\n")
-                            break  # Use first successful query
-                    except:
-                        continue
-                return "\n".join(all_results) if all_results else f"Could not verify: {claim}"
             except Exception as e:
-                return f"Verification failed: {str(e)}"
-        return [smart_web_search, enhanced_wikipedia, precision_calculator, smart_file_handler, verification_search]
     def _create_workflow(self):
-        """Create ReAct-inspired workflow for GAIA success."""
         workflow = StateGraph(dict)
-        workflow.add_node("reasoner", self._reasoning_node)
-        workflow.add_node("actor", self._action_node)
-        workflow.add_node("observer", self._observation_node)
-        workflow.add_node("finalizer", self._finalization_node)
-        workflow.set_entry_point("reasoner")
         workflow.add_conditional_edges(
-            "reasoner",
-            self._reasoning_decision,
             {
-                "act": "actor",
-                "final": "finalizer"
             }
         )
-        workflow.add_edge("actor", "observer")
         workflow.add_conditional_edges(
-            "observer",
-            self._observation_decision,
             {
-                "continue": "reasoner",
-                "finalize": "finalizer"
             }
         )
-        workflow.add_edge("finalizer", END)
         return workflow
-    def _reasoning_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """
-        Advanced reasoning node using CoT + ReAct methodology.
-        Based on successful GAIA agent strategies.
-        """
-        question = state.get("question", "")
         step_count = state.get("step_count", 0)
         max_steps = state.get("max_steps", 4)
-        execution_log = state.get("execution_log", [])
         if step_count >= max_steps:
             return {
                 **state,
-                "reasoning": "Maximum steps reached. Must provide final answer with available information.",
-                "decision": "final"
             }
-        # Build context from execution log
-        context = ""
-        if execution_log:
-            context = "\n".join([f"Step {i+1}: {log}" for i, log in enumerate(execution_log)])
-        reasoning_prompt = f"""You are an expert GAIA benchmark solver with a 35%+ success rate. Use systematic reasoning to solve this question.
-QUESTION: {question}
-EXECUTION HISTORY:
-{context if context else "No previous steps."}
-CRITICAL GAIA SUCCESS PRINCIPLES:
-1. EXACT ANSWERS ONLY: No explanations, just the precise answer (number, name, date, yes/no)
-2. STRATEGIC TOOL USE: Each tool call must have a clear purpose toward the final answer
-3. VERIFICATION: Double-check facts and calculations when possible
-4. EFFICIENCY: Level 1 should be solved in 1-3 steps maximum
 AVAILABLE TOOLS:
-- smart_web_search: Current information, recent events, specific facts
-- enhanced_wikipedia: Established facts, biographical data, historical information
-- precision_calculator: All calculations, data processing, file analysis
-- smart_file_handler: File detection and processing guidance
-- verification_search: Fact-checking and answer verification
-REASONING STRATEGY:
-1. Identify the EXACT answer format needed (What type: number, name, date, etc.?)
-2. Determine the specific information required (What facts do I need?)
-3. Choose the optimal tool for that information (Which tool is best?)
-4. Plan verification if needed (How can I double-check?)
-YOUR REASONING TASK:
-Think step-by-step about what you need to solve this question. Then decide:
-FORMAT YOUR RESPONSE AS:
-THOUGHT: [Your detailed reasoning about what's needed]
-ACTION: [tool_name]
-INPUT: [specific input for the tool]
-PURPOSE: [what you expect to learn/achieve]
-OR if you have enough information:
-THOUGHT: [Why you have enough information]
-FINAL_ANSWER: [exact answer only]
-Be extremely specific in your tool inputs. Use exact names, dates, phrases from the question."""
-        response = self.model.invoke([{"role": "user", "content": reasoning_prompt}])
         content = response.content.strip()
-        # Parse reasoning response
-        if "FINAL_ANSWER:" in content:
-            final_answer = re.search(r'FINAL_ANSWER:\s*(.+?)(?:\n|$)', content, re.IGNORECASE | re.DOTALL)
-            if final_answer:
-                answer = final_answer.group(1).strip()
-                return {
-                    **state,
-                    "reasoning": content,
-                    "final_answer": answer,
-                    "decision": "final"
-                }
-        # Parse action
-        thought_match = re.search(r'THOUGHT:\s*(.+?)(?:ACTION:|$)', content, re.IGNORECASE | re.DOTALL)
-        action_match = re.search(r'ACTION:\s*(\w+)', content, re.IGNORECASE)
-        input_match = re.search(r'INPUT:\s*(.+?)(?:PURPOSE:|$)', content, re.IGNORECASE | re.DOTALL)
-        purpose_match = re.search(r'PURPOSE:\s*(.+?)$', content, re.IGNORECASE | re.DOTALL)
-        if action_match and input_match:
-            thought = thought_match.group(1).strip() if thought_match else "No reasoning provided"
-            action = action_match.group(1).strip()
-            tool_input = input_match.group(1).strip()
-            purpose = purpose_match.group(1).strip() if purpose_match else "No purpose specified"
             return {
                 **state,
-                "reasoning": thought,
-                "current_action": action,
-                "current_input": tool_input,
-                "current_purpose": purpose,
-                "decision": "act",
-                "step_count": step_count + 1
             }
         else:
-            # Fallback - treat as final answer
             return {
                 **state,
-                "reasoning": content,
                 "final_answer": content,
-                "decision": "final"
             }
-    def _action_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Execute the planned action with enhanced error handling."""
-        action = state.get("current_action", "")
         tool_input = state.get("current_input", "")
-        purpose = state.get("current_purpose", "")
-        # Tool mapping with fuzzy matching
         tool_map = {tool.name: tool for tool in self.tools}
-        # Add common aliases
-        aliases = {
-            "search": "smart_web_search",
-            "web": "smart_web_search",
-            "google": "smart_web_search",
-            "wiki": "enhanced_wikipedia",
-            "wikipedia": "enhanced_wikipedia",
-            "calc": "precision_calculator",
-            "calculate": "precision_calculator",
-            "python": "precision_calculator",
-            "code": "precision_calculator",
-            "file": "smart_file_handler",
-            "verify": "verification_search",
-            "check": "verification_search"
         }
-        # Find the right tool
-        tool_name = action.lower()
-        if tool_name in aliases:
-            tool_name = aliases[tool_name]
         matched_tool = None
-        for real_name, tool in tool_map.items():
-            if tool_name in real_name.lower() or real_name.lower() in tool_name:
-                matched_tool = tool
                 break
         if matched_tool:
             try:
                 result = matched_tool.run(tool_input)
                 return {
                     **state,
-                    "action_result": result,
-                    "action_success": True,
-                    "last_tool": action,
-                    "last_input": tool_input
                 }
             except Exception as e:
                 return {
                     **state,
-                    "action_result": f"Tool execution failed: {str(e)}",
-                    "action_success": False,
-                    "last_tool": action,
-                    "last_input": tool_input
                 }
         else:
             available = list(tool_map.keys())
             return {
                 **state,
-                "action_result": f"Tool '{action}' not found. Available tools: {available}",
-                "action_success": False,
-                "last_tool": action,
-                "last_input": tool_input
             }
-    def _observation_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Process and analyze the action result."""
-        result = state.get("action_result", "")
-        success = state.get("action_success", False)
-        tool = state.get("last_tool", "")
-        purpose = state.get("current_purpose", "")
-        execution_log = state.get("execution_log", [])
-        # Create observation summary
-        if success:
-            observation = f"Successfully used {tool}: {result[:300]}..." if len(result) > 300 else f"Successfully used {tool}: {result}"
-        else:
-            observation = f"Failed to use {tool}: {result}"
-        execution_log.append(observation)
-        # Determine if we should continue or finalize
-        # Check if we have a clear answer in the result
-        answer_indicators = [
-            "the answer is", "result:", "final answer:", "solution:",
-            "equals", "=", "total:", "amount:", "number:", "date:", "name:"
-        ]
-        has_potential_answer = any(indicator in result.lower() for indicator in answer_indicators)
-        # Also check if result contains specific formats (numbers, dates, names)
-        has_number = re.search(r'\b\d+\b', result)
-        has_date = re.search(r'\b\d{4}\b|\b\d{1,2}/\d{1,2}/\d{2,4}\b', result)
-        if has_potential_answer or has_number or has_date:
-            decision = "finalize"
-        else:
-            decision = "continue"
-        return {
-            **state,
-            "execution_log": execution_log,
-            "last_observation": observation,
-            "decision": decision
-        }
-    def _finalization_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract and clean the final answer."""
-        question = state.get("question", "")
-        execution_log = state.get("execution_log", [])
-        action_result = state.get("action_result", "")
         final_answer = state.get("final_answer", "")
-        if final_answer:
-            cleaned_answer = self._clean_final_answer(final_answer)
-        else:
-            # Extract answer from the last action result
-            extraction_prompt = f"""Extract the exact answer to this question from the provided information.
-QUESTION: {question}
-INFORMATION GATHERED:
-{action_result}
-INSTRUCTIONS:
-- Provide ONLY the exact answer - no explanations, no context
-- If it's a number, provide just the number
-- If it's a name, provide just the name
-- If it's a date, provide just the date
-- If it's yes/no, provide just "Yes" or "No"
-- If you cannot determine the answer, respond with "Unable to determine"
 EXACT ANSWER:"""
-            response = self.model.invoke([{"role": "user", "content": extraction_prompt}])
-            cleaned_answer = self._clean_final_answer(response.content)
         return {
             **state,
-            "final_answer": cleaned_answer,
             "completed": True
         }
-    def _clean_final_answer(self, answer: str) -> str:
-        """Clean and format the final answer for GAIA submission."""
         if not answer:
             return "No answer found"
-        cleaned = answer.strip()
-        # Remove common prefixes and suffixes
         prefixes = [
             "the answer is", "answer:", "final answer:", "result:",
-            "exact answer:", "solution:", "response:", "output:",
-            "based on", "according to", "it appears", "it seems"
         ]
         for prefix in prefixes:
             if cleaned.lower().startswith(prefix):
                 cleaned = cleaned[len(prefix):].strip()
-                break
-        # Remove quotes
-        if (cleaned.startswith('"') and cleaned.endswith('"')) or (cleaned.startswith("'") and cleaned.endswith("'")):
             cleaned = cleaned[1:-1]
-        # Remove trailing periods for non-sentence answers
-        if len(cleaned.split()) <= 3 and cleaned.endswith('.'):
-            cleaned = cleaned[:-1]
-        # Handle special cases
-        if cleaned.lower() in ['yes', 'no', 'true', 'false']:
-            cleaned = cleaned.capitalize()
         return cleaned
-    def _reasoning_decision(self, state: Dict[str, Any]) -> str:
-        """Determine next step from reasoning."""
-        return state.get("decision", "act")
-    def _observation_decision(self, state: Dict[str, Any]) -> str:
-        """Determine next step from observation."""
-        return state.get("decision", "continue")
     def run(self, question: str, max_steps: int = 4) -> str:
         """
-        Run the agent with GAIA-winning optimizations.
-        Designed to achieve 30%+ success rate on GAIA Level 1.
         """
         initial_state = {
-            "question": question,
             "step_count": 0,
             "max_steps": max_steps,
-            "execution_log": [],
             "completed": False
         }

 #             logger.error(f"Agent failed to generate response: {e}")
 #             raise
  # from langgraph.gr import StateGraph, END
+from langgraph import StateGraph, END
 from langchain_core.messages import HumanMessage, AIMessage
 from langchain_openai import AzureChatOpenAI
 from langchain_community.tools import WikipediaQueryRun, DuckDuckGoSearchRun
 class BasicAgent:
     def __init__(self, model_id: Optional[str] = None, api_key: Optional[str] = None):
         """
+        Initialize BasicAgent optimized for GAIA benchmark success.
         """
         # Initialize model
         self.app = self.workflow.compile()
     def _initialize_tools(self):
+        """Initialize tools with GAIA-specific optimizations."""
         @tool
+        def web_search(query: str) -> str:
             """
+            Search for current information on the web. Use specific, targeted queries.
+            Best for: recent events, current data, specific facts, news.
             """
             try:
+                ddg = DuckDuckGoSearchAPIWrapper(max_results=5)
                 results = ddg.run(query)
+                return results[:1500]
             except Exception as e:
                 return f"Search failed: {str(e)}"
+        @tool
+        def wikipedia_search(query: str) -> str:
             """
+            Search Wikipedia for established facts, definitions, historical data.
+            Best for: biographical info, historical events, scientific concepts, definitions.
             """
             try:
+                wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=1000)
                 result = wiki.run(query)
                 return result
             except Exception as e:
                 return f"Wikipedia search failed: {str(e)}"
         @tool
+        def python_calculator(code: str) -> str:
             """
+            Execute Python code for calculations, data processing, file operations.
+            Best for: complex math, data analysis, file processing, calculations.
+            Always include print() statements to see results.
             """
             try:
+                # Enhanced Python environment
                 exec_globals = {
                     '__builtins__': __builtins__,
                     'math': math,
                     'np': np,
                     'numpy': np,
+                    'pd': None,  # Will try to import if needed
                     'os': os,
+                    're': re
                 }
+                # Try to import common libraries
                 try:
                     import pandas as pd
+                    exec_globals['pd'] = pd
+                    exec_globals['pandas'] = pd
                 except:
                     pass
+                # Capture output
                 import io
                 import sys
                 old_stdout = sys.stdout
                 sys.stdout = captured_output = io.StringIO()
+                # Execute code
+                exec(code, exec_globals)
                 # Get output
                 sys.stdout = old_stdout
                 output = captured_output.getvalue()
+                return output if output.strip() else "Code executed successfully (no output)"
             except Exception as e:
+                return f"Python execution error: {str(e)}"
         @tool
+        def simple_math(expression: str) -> str:
             """
+            Evaluate simple mathematical expressions quickly.
+            Best for: basic arithmetic, simple calculations.
+            Examples: "2+3*4", "sqrt(16)", "sin(pi/4)"
             """
             try:
+                # Safe evaluation environment
+                allowed_names = {
+                    k: v for k, v in math.__dict__.items() if not k.startswith("__")
+                }
+                allowed_names.update({
+                    "abs": abs, "round": round, "min": min, "max": max,
+                    "sum": sum, "pow": pow, "divmod": divmod
+                })
+                result = eval(expression, {"__builtins__": {}}, allowed_names)
+                return str(result)
             except Exception as e:
+                return f"Math error: {str(e)}"
         @tool
+        def file_analyzer(task: str) -> str:
             """
+            Analyze files in the current directory.
+            Best for: examining uploaded files, extracting data from files.
             """
             try:
+                # List available files
+                files = [f for f in os.listdir('.') if os.path.isfile(f)]
+                result = f"Available files: {files}\n"
+                result += f"Task: {task}\n"
+                result += "Use python_calculator for detailed file processing."
+                return result
             except Exception as e:
+                return f"File analysis error: {str(e)}"
+        return [web_search, wikipedia_search, python_calculator, simple_math, file_analyzer]
     def _create_workflow(self):
+        """Create optimized LangGraph workflow."""
         workflow = StateGraph(dict)
+        workflow.add_node("planner", self._planner_node)
+        workflow.add_node("executor", self._executor_node)
+        workflow.add_node("validator", self._validator_node)
+        workflow.set_entry_point("planner")
         workflow.add_conditional_edges(
+            "planner",
+            self._plan_decision,
             {
+                "execute": "executor",
+                "final": "validator"
             }
         )
         workflow.add_conditional_edges(
+            "executor",
+            self._execution_decision,
             {
+                "continue": "planner",
+                "validate": "validator"
             }
         )
+        workflow.add_edge("validator", END)
         return workflow
+    def _planner_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Enhanced planning node focused on GAIA success patterns."""
+        messages = state.get("messages", [])
         step_count = state.get("step_count", 0)
         max_steps = state.get("max_steps", 4)
+        plan_history = state.get("plan_history", [])
         if step_count >= max_steps:
             return {
                 **state,
+                "final_answer": "Maximum steps reached. Providing best available answer.",
+                "action_type": "final"
             }
+        planning_prompt = f"""You are a GAIA benchmark specialist. Your task is to solve this question with MAXIMUM ACCURACY.
+QUESTION: {messages[0]['content'] if messages else 'No question provided'}
+EXECUTION HISTORY: {plan_history}
+CRITICAL SUCCESS FACTORS:
+1. PRECISION: GAIA answers must be EXACT - no approximations, no explanations
+2. STEP EFFICIENCY: Use minimal steps (typically 1-3 for Level 1)
+3. TOOL SELECTION: Choose the RIGHT tool for each specific task
 AVAILABLE TOOLS:
+- web_search: Current/recent information, news, live data
+- wikipedia_search: Established facts, biographical data, historical info
+- python_calculator: Complex calculations, data processing, file operations
+- simple_math: Quick arithmetic, basic math functions
+- file_analyzer: Examine uploaded files
+PLANNING STRATEGY:
+1. Identify the EXACT answer format needed (number, name, date, etc.)
+2. Determine the specific information required
+3. Choose the BEST tool for that information type
+4. Plan for verification if needed
+RESPONSE FORMAT:
+If you need to use a tool: "EXECUTE: [tool_name] | INPUT: [specific_input] | GOAL: [what_you_expect]"
+If you have the final answer: "FINAL: [exact_answer_only]"
+Be extremely specific in your tool inputs. Avoid vague searches."""
+        response = self.model.invoke([{"role": "system", "content": planning_prompt}])
         content = response.content.strip()
+        if content.startswith("FINAL:"):
+            answer = content.replace("FINAL:", "").strip()
             return {
                 **state,
+                "final_answer": answer,
+                "action_type": "final",
+                "step_count": step_count
             }
+        elif content.startswith("EXECUTE:"):
+            # Parse execution command
+            try:
+                parts = content.replace("EXECUTE:", "").split("|")
+                tool_name = parts[0].split()[0].strip()
+                input_part = [p for p in parts if p.strip().startswith("INPUT:")][0]
+                tool_input = input_part.replace("INPUT:", "").strip()
+                goal_part = [p for p in parts if p.strip().startswith("GOAL:")][0] if len(parts) > 2 else ""
+                goal = goal_part.replace("GOAL:", "").strip() if goal_part else ""
+                return {
+                    **state,
+                    "current_tool": tool_name,
+                    "current_input": tool_input,
+                    "current_goal": goal,
+                    "action_type": "execute",
+                    "step_count": step_count + 1
+                }
+            except Exception as e:
+                return {
+                    **state,
+                    "final_answer": f"Planning error: {str(e)}",
+                    "action_type": "final"
+                }
         else:
             return {
                 **state,
                 "final_answer": content,
+                "action_type": "final"
             }
+    def _executor_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Execute the planned action."""
+        tool_name = state.get("current_tool", "")
         tool_input = state.get("current_input", "")
+        goal = state.get("current_goal", "")
+        plan_history = state.get("plan_history", [])
+        # Find and execute tool
         tool_map = {tool.name: tool for tool in self.tools}
+        # Add flexible matching
+        tool_matches = {
+            "web_search": ["web", "search", "google", "internet"],
+            "wikipedia_search": ["wiki", "wikipedia"],
+            "python_calculator": ["python", "code", "calc", "calculate"],
+            "simple_math": ["math", "arithmetic"],
+            "file_analyzer": ["file", "analyze"]
         }
         matched_tool = None
+        for tool_real_name, aliases in tool_matches.items():
+            if tool_name.lower() in aliases or tool_name.lower() == tool_real_name.lower():
+                matched_tool = tool_map.get(tool_real_name)
                 break
+        if not matched_tool:
+            matched_tool = tool_map.get(tool_name)
         if matched_tool:
             try:
                 result = matched_tool.run(tool_input)
+                execution_record = f"STEP: Used {tool_name} with '{tool_input}' -> {result[:200]}..."
+                plan_history.append(execution_record)
                 return {
                     **state,
+                    "last_result": result,
+                    "plan_history": plan_history,
+                    "action_type": "continue"
                 }
             except Exception as e:
+                error_msg = f"Tool {tool_name} failed: {str(e)}"
+                plan_history.append(f"ERROR: {error_msg}")
                 return {
                     **state,
+                    "last_result": error_msg,
+                    "plan_history": plan_history,
+                    "action_type": "validate"
                 }
         else:
             available = list(tool_map.keys())
+            error_msg = f"Tool '{tool_name}' not found. Available: {available}"
+            plan_history.append(f"ERROR: {error_msg}")
             return {
                 **state,
+                "last_result": error_msg,
+                "plan_history": plan_history,
+                "action_type": "validate"
             }
+    def _validator_node(self, state: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and finalize the answer."""
         final_answer = state.get("final_answer", "")
+        plan_history = state.get("plan_history", [])
+        last_result = state.get("last_result", "")
+        if not final_answer and last_result:
+            # Extract answer from last result
+            validation_prompt = f"""Extract the EXACT answer from this result for the GAIA question.
+QUESTION: {state.get('messages', [{}])[0].get('content', '')}
+TOOL RESULT: {last_result}
+Provide ONLY the precise answer - no explanations, no context, just the exact answer required.
+Examples:
+- If asked for a number: "42"
+- If asked for a name: "John Smith"
+- If asked for a date: "1969"
+- If asked for a yes/no: "Yes"
 EXACT ANSWER:"""
+            response = self.model.invoke([{"role": "user", "content": validation_prompt}])
+            final_answer = response.content.strip()
+        # Clean up the answer
+        final_answer = self._clean_answer(final_answer)
         return {
             **state,
+            "final_answer": final_answer,
             "completed": True
         }
+    def _clean_answer(self, answer: str) -> str:
+        """Clean and format the final answer for GAIA."""
         if not answer:
             return "No answer found"
+        # Remove common prefixes
         prefixes = [
             "the answer is", "answer:", "final answer:", "result:",
+            "exact answer:", "solution:", "response:", "output:"
         ]
+        cleaned = answer.strip()
         for prefix in prefixes:
             if cleaned.lower().startswith(prefix):
                 cleaned = cleaned[len(prefix):].strip()
+        # Remove quotes if they wrap the entire answer
+        if cleaned.startswith('"') and cleaned.endswith('"'):
             cleaned = cleaned[1:-1]
+        if cleaned.startswith("'") and cleaned.endswith("'"):
+            cleaned = cleaned[1:-1]
         return cleaned
+    def _plan_decision(self, state: Dict[str, Any]) -> str:
+        """Decide whether to execute or finalize."""
+        return state.get("action_type", "execute")
+    def _execution_decision(self, state: Dict[str, Any]) -> str:
+        """Decide next step after execution."""
+        return state.get("action_type", "continue")
     def run(self, question: str, max_steps: int = 4) -> str:
         """
+        Run the agent with GAIA-optimized settings.
         """
         initial_state = {
+            "messages": [{"role": "user", "content": question}],
             "step_count": 0,
             "max_steps": max_steps,
+            "plan_history": [],
             "completed": False
         }