Files changed (6) hide show
  1. agent.py +144 -0
  2. api_client.py +42 -0
  3. app.py +91 -196
  4. prompts.py +0 -0
  5. requirements.txt +11 -2
  6. tools.py +117 -0
agent.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # agent.py
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from langchain_openai import ChatOpenAI
5
+ from langchain_core.messages import SystemMessage, HumanMessage
6
+ from langgraph.graph import StateGraph, START, END
7
+ from langgraph.prebuilt import ToolNode, tools_condition
8
+
9
+ from tools import TOOLS
10
+ from typing import TypedDict, Annotated, List
11
+ from langchain_core.messages import BaseMessage
12
+ import operator
13
+
14
+ load_dotenv()
15
+
16
+ # ── 1. State Definition ─────────────────────────────────────────────────────
17
+
18
+ class AgentState(TypedDict):
19
+ """
20
+ This is the shared state that flows through the entire graph.
21
+ Every node can read from it and write to it.
22
+
23
+ - messages: The conversation history (the LLM sees all of this).
24
+ operator.add means new messages are APPENDED, not replaced.
25
+ - task_id: The GAIA question ID (needed to download attached files).
26
+ """
27
+ messages: Annotated[List[BaseMessage], operator.add]
28
+ task_id: str
29
+
30
+ # ── 2. System Prompt ────────────────────────────────────────────────────────
31
+
32
+ SYSTEM_PROMPT = """You are a precise research assistant solving benchmark questions.
33
+
34
+ RULES:
35
+ - You MUST use the available tools to find accurate information. Do NOT guess.
36
+ - After finding the answer, respond with ONLY the answer itself.
37
+ - Do NOT include phrases like "The answer is", "Based on my research", or "FINAL ANSWER".
38
+ - Do NOT include units unless they are explicitly part of the answer.
39
+ - For numbers, give the exact number (e.g., "42" not "approximately 42").
40
+ - For names, give the full correct name with correct spelling.
41
+ - For dates, match the format implied by the question (e.g., "1969" or "July 20, 1969").
42
+
43
+ Available tools: web_search, wikipedia_search, calculate, read_excel_file.
44
+ Choose the right tool for the question type.
45
+ """
46
+
47
+ # ── 3. LLM with Tools Bound ─────────────────────────────────────────────────
48
+
49
+ llm = ChatOpenAI(
50
+ model="gpt-4o-mini", # Affordable and capable; swap to gpt-4o for better results
51
+ temperature=0, # Zero temperature = deterministic, no creative hallucinations
52
+ api_key=os.getenv("OPENAI_API_KEY")
53
+ )
54
+
55
+ # Binding tools to the LLM tells it what tools are available and how to call them
56
+ llm_with_tools = llm.bind_tools(TOOLS)
57
+
58
+ # ── 4. Node Definitions ─────────────────────────────────────────────────────
59
+
60
+ def agent_node(state: AgentState) -> dict:
61
+ """
62
+ The main thinking node. The LLM looks at all messages so far
63
+ and decides: call a tool, or give the final answer.
64
+ """
65
+ # Prepend the system message to give the LLM its instructions
66
+ messages = [SystemMessage(content=SYSTEM_PROMPT)] + state["messages"]
67
+
68
+ # Call the LLM (which has tools bound to it)
69
+ response = llm_with_tools.invoke(messages)
70
+
71
+ # Return the response as a new message to be appended to state
72
+ return {"messages": [response]}
73
+
74
+
75
+ # ToolNode is a pre-built LangGraph node that automatically handles tool calls.
76
+ # It reads the tool_calls from the last AI message, runs the tools, and returns results.
77
+ tool_node = ToolNode(tools=TOOLS)
78
+
79
+ # ── 5. Routing Logic ────────────────────────────────────────────────────────
80
+
81
+ # tools_condition is a pre-built LangGraph function that checks if the last
82
+ # AI message contains tool calls. If yes β†’ go to "tools"; if no β†’ go to END.
83
+ # This is the conditional edge that creates the ReAct loop.
84
+
85
+ # ── 6. Build the Graph ──────────────────────────────────────────────────────
86
+
87
+ def build_agent():
88
+ """Constructs and compiles the LangGraph agent."""
89
+
90
+ graph = StateGraph(AgentState)
91
+
92
+ # Add nodes
93
+ graph.add_node("agent", agent_node)
94
+ graph.add_node("tools", tool_node)
95
+
96
+ # Entry point: always start at the agent node
97
+ graph.add_edge(START, "agent")
98
+
99
+ # Conditional edge: after the agent node, check if a tool was called
100
+ graph.add_conditional_edges(
101
+ "agent", # Source node
102
+ tools_condition, # The function that decides where to go
103
+ # tools_condition returns "tools" if tool_calls exist, else "__end__"
104
+ )
105
+
106
+ # After tools run, always go back to the agent (to process tool results)
107
+ graph.add_edge("tools", "agent")
108
+
109
+ # Compile the graph into a runnable
110
+ return graph.compile()
111
+
112
+
113
+ # Create the agent (call this once at startup)
114
+ agent = build_agent()
115
+
116
+
117
+ # ── 7. Run Function ─────────────────────────────────────────────────────────
118
+
119
+ def run_agent(question: str, task_id: str = "") -> str:
120
+ """
121
+ Runs the agent on a single GAIA question.
122
+
123
+ Args:
124
+ question: The question text from the GAIA API.
125
+ task_id: The task ID (needed if a file is attached).
126
+
127
+ Returns:
128
+ The agent's answer as a string.
129
+ """
130
+ initial_state = {
131
+ "messages": [HumanMessage(content=question)],
132
+ "task_id": task_id
133
+ }
134
+
135
+ # recursion_limit prevents infinite loops (default is 25)
136
+ config = {"recursion_limit": 15}
137
+
138
+ try:
139
+ final_state = agent.invoke(initial_state, config=config)
140
+ # The last message in the state is the agent's final answer
141
+ last_message = final_state["messages"][-1]
142
+ return last_message.content.strip()
143
+ except Exception as e:
144
+ return f"Agent error: {str(e)}"
api_client.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # api_client.py
2
+ import requests
3
+
4
+ SCORING_API = "https://agents-course-unit4-scoring.hf.space"
5
+
6
+ def get_all_questions() -> list[dict]:
7
+ """
8
+ Fetches all 20 Level 1 questions from the GAIA scoring API.
9
+
10
+ Returns a list of dicts, each with:
11
+ - task_id: str (unique ID for the question)
12
+ - question: str (the question text)
13
+ - file_name: str (filename of attached file, if any; else empty string)
14
+ """
15
+ response = requests.get(f"{SCORING_API}/questions", timeout=30)
16
+ if response.status_code == 200:
17
+ return response.json()
18
+ else:
19
+ raise Exception(f"Failed to fetch questions: {response.status_code}")
20
+
21
+ def submit_answers(username: str, agent_code_url: str, answers: list[dict]) -> dict:
22
+ """
23
+ Submits answers to the GAIA API for scoring.
24
+
25
+ Args:
26
+ username: Your HuggingFace username.
27
+ agent_code_url: URL to your HF Space code (e.g. https://huggingface.co/spaces/yourname/space/tree/main)
28
+ answers: List of {"task_id": "...", "submitted_answer": "..."} dicts.
29
+
30
+ Returns:
31
+ A dict with your score and leaderboard position.
32
+ """
33
+ payload = {
34
+ "username": username,
35
+ "agent_code": agent_code_url,
36
+ "answers": answers
37
+ }
38
+ response = requests.post(f"{SCORING_API}/submit", json=payload, timeout=60)
39
+ if response.status_code == 200:
40
+ return response.json()
41
+ else:
42
+ raise Exception(f"Submission failed: {response.status_code} β€” {response.text}")
app.py CHANGED
@@ -1,196 +1,91 @@
1
- import os
2
- import gradio as gr
3
- import requests
4
- import inspect
5
- import pandas as pd
6
-
7
- # (Keep Constants as is)
8
- # --- Constants ---
9
- DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
-
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
- def run_and_submit_all( profile: gr.OAuthProfile | None):
23
- """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
- and displays the results.
26
- """
27
- # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
-
30
- if profile:
31
- username= f"{profile.username}"
32
- print(f"User logged in: {username}")
33
- else:
34
- print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
-
37
- api_url = DEFAULT_API_URL
38
- questions_url = f"{api_url}/questions"
39
- submit_url = f"{api_url}/submit"
40
-
41
- # 1. Instantiate Agent ( modify this part to create your agent)
42
- try:
43
- agent = BasicAgent()
44
- except Exception as e:
45
- print(f"Error instantiating agent: {e}")
46
- return f"Error initializing agent: {e}", None
47
- # In the case of an app running as a hugging Face space, this link points toward your codebase ( usefull for others so please keep it public)
48
- agent_code = f"https://huggingface.co/spaces/{space_id}/tree/main"
49
- print(agent_code)
50
-
51
- # 2. Fetch Questions
52
- print(f"Fetching questions from: {questions_url}")
53
- try:
54
- response = requests.get(questions_url, timeout=15)
55
- response.raise_for_status()
56
- questions_data = response.json()
57
- if not questions_data:
58
- print("Fetched questions list is empty.")
59
- return "Fetched questions list is empty or invalid format.", None
60
- print(f"Fetched {len(questions_data)} questions.")
61
- except requests.exceptions.RequestException as e:
62
- print(f"Error fetching questions: {e}")
63
- return f"Error fetching questions: {e}", None
64
- except requests.exceptions.JSONDecodeError as e:
65
- print(f"Error decoding JSON response from questions endpoint: {e}")
66
- print(f"Response text: {response.text[:500]}")
67
- return f"Error decoding server response for questions: {e}", None
68
- except Exception as e:
69
- print(f"An unexpected error occurred fetching questions: {e}")
70
- return f"An unexpected error occurred fetching questions: {e}", None
71
-
72
- # 3. Run your Agent
73
- results_log = []
74
- answers_payload = []
75
- print(f"Running agent on {len(questions_data)} questions...")
76
- for item in questions_data:
77
- task_id = item.get("task_id")
78
- question_text = item.get("question")
79
- if not task_id or question_text is None:
80
- print(f"Skipping item with missing task_id or question: {item}")
81
- continue
82
- try:
83
- submitted_answer = agent(question_text)
84
- answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
85
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
86
- except Exception as e:
87
- print(f"Error running agent on task {task_id}: {e}")
88
- results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
89
-
90
- if not answers_payload:
91
- print("Agent did not produce any answers to submit.")
92
- return "Agent did not produce any answers to submit.", pd.DataFrame(results_log)
93
-
94
- # 4. Prepare Submission
95
- submission_data = {"username": username.strip(), "agent_code": agent_code, "answers": answers_payload}
96
- status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
- print(status_update)
98
-
99
- # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
- try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
- try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
- except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
-
142
-
143
- # --- Build Gradio Interface using Blocks ---
144
- with gr.Blocks() as demo:
145
- gr.Markdown("# Basic Agent Evaluation Runner")
146
- gr.Markdown(
147
- """
148
- **Instructions:**
149
-
150
- 1. Please clone this space, then modify the code to define your agent's logic, the tools, the necessary packages, etc ...
151
- 2. Log in to your Hugging Face account using the button below. This uses your HF username for submission.
152
- 3. Click 'Run Evaluation & Submit All Answers' to fetch questions, run your agent, submit answers, and see the score.
153
-
154
- ---
155
- **Disclaimers:**
156
- Once clicking on the "submit button, it can take quite some time ( this is the time for the agent to go through all the questions).
157
- This space provides a basic setup and is intentionally sub-optimal to encourage you to develop your own, more robust solution. For instance for the delay process of the submit button, a solution could be to cache the answers and submit in a seperate action or even to answer the questions in async.
158
- """
159
- )
160
-
161
- gr.LoginButton()
162
-
163
- run_button = gr.Button("Run Evaluation & Submit All Answers")
164
-
165
- status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
166
- # Removed max_rows=10 from DataFrame constructor
167
- results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
168
-
169
- run_button.click(
170
- fn=run_and_submit_all,
171
- outputs=[status_output, results_table]
172
- )
173
-
174
- if __name__ == "__main__":
175
- print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
- space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
-
180
- if space_host_startup:
181
- print(f"βœ… SPACE_HOST found: {space_host_startup}")
182
- print(f" Runtime URL should be: https://{space_host_startup}.hf.space")
183
- else:
184
- print("ℹ️ SPACE_HOST environment variable not found (running locally?).")
185
-
186
- if space_id_startup: # Print repo URLs if SPACE_ID is found
187
- print(f"βœ… SPACE_ID found: {space_id_startup}")
188
- print(f" Repo URL: https://huggingface.co/spaces/{space_id_startup}")
189
- print(f" Repo Tree URL: https://huggingface.co/spaces/{space_id_startup}/tree/main")
190
- else:
191
- print("ℹ️ SPACE_ID environment variable not found (running locally?). Repo URL cannot be determined.")
192
-
193
- print("-"*(60 + len(" App Starting ")) + "\n")
194
-
195
- print("Launching Gradio Interface for Basic Agent Evaluation...")
196
- demo.launch(debug=True, share=False)
 
1
+ # app.py
2
+ import gradio as gr
3
+ import pandas as pd
4
+ from agent import run_agent
5
+ from api_client import get_all_questions, submit_answers
6
+
7
+ def run_evaluation(profile: gr.OAuthProfile | None):
8
+ """
9
+ The main function triggered when the user clicks "Run Evaluation".
10
+ Fetches all questions, runs the agent on each, and submits answers.
11
+ """
12
+ if profile is None:
13
+ return "Please log in with your HuggingFace account first.", None
14
+
15
+ username = profile.username
16
+ agent_code_url = f"https://huggingface.co/spaces/{username}/gaia-agent/tree/main"
17
+
18
+ # Step 1: Fetch questions
19
+ try:
20
+ questions = get_all_questions()
21
+ except Exception as e:
22
+ return f"Failed to fetch questions: {e}", None
23
+
24
+ # Step 2: Run the agent on each question
25
+ answers = []
26
+ results_log = [] # For displaying in the UI table
27
+
28
+ for q in questions:
29
+ task_id = q["task_id"]
30
+ question_text = q["question"]
31
+
32
+ print(f"\n{'='*60}")
33
+ print(f"Question: {question_text[:100]}...")
34
+
35
+ # Run the agent
36
+ answer = run_agent(question=question_text, task_id=task_id)
37
+
38
+ print(f"Answer: {answer}")
39
+
40
+ answers.append({
41
+ "task_id": task_id,
42
+ "submitted_answer": answer
43
+ })
44
+
45
+ results_log.append({
46
+ "Task ID": task_id[:8] + "...",
47
+ "Question (truncated)": question_text[:80] + "...",
48
+ "Agent Answer": answer
49
+ })
50
+
51
+ # Step 3: Submit answers
52
+ try:
53
+ result = submit_answers(username, agent_code_url, answers)
54
+ score = result.get("score", "N/A")
55
+ correct = result.get("correct_count", "N/A")
56
+ total = result.get("total_attempted", len(answers))
57
+
58
+ summary = f"βœ… Submitted! Score: {score:.1%} ({correct}/{total} correct)"
59
+ except Exception as e:
60
+ summary = f"⚠️ Submission error: {e}"
61
+
62
+ # Return summary text and a dataframe for display
63
+ df = pd.DataFrame(results_log)
64
+ return summary, df
65
+
66
+
67
+ # ── Build the Gradio Interface ───────────────────────────────────────────────
68
+
69
+ with gr.Blocks() as demo:
70
+ gr.Markdown("# GAIA Level 1 Agent β€” HuggingFace Agents Course")
71
+ gr.Markdown(
72
+ "Log in with your HuggingFace account, then click **Run Evaluation** "
73
+ "to run the agent on all 20 GAIA Level 1 questions and submit your score."
74
+ )
75
+
76
+ # HuggingFace OAuth login button
77
+ login_btn = gr.LoginButton()
78
+
79
+ run_btn = gr.Button("Run Evaluation", variant="primary")
80
+
81
+ status_output = gr.Textbox(label="Status", interactive=False)
82
+ results_table = gr.Dataframe(label="Results", interactive=False)
83
+
84
+ run_btn.click(
85
+ fn=run_evaluation,
86
+ inputs=[], # profile is injected automatically by Gradio
87
+ outputs=[status_output, results_table]
88
+ )
89
+
90
+ if __name__ == "__main__":
91
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prompts.py ADDED
File without changes
requirements.txt CHANGED
@@ -1,2 +1,11 @@
1
- gradio
2
- requests
 
 
 
 
 
 
 
 
 
 
1
+ langgraph>=0.2.0
2
+ langchain>=0.3.0
3
+ langchain-openai>=0.2.0
4
+ langchain-community>=0.3.0
5
+ tavily-python>=0.3.0
6
+ gradio>=5.0.0
7
+ requests>=2.31.0
8
+ pandas>=2.0.0
9
+ openpyxl>=3.1.0
10
+ duckduckgo-search>=6.0.0
11
+ python-dotenv>=1.0.0
tools.py ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # tools.py
2
+ import os
3
+ import requests
4
+ import pandas as pd
5
+ from langchain_core.tools import tool
6
+ from langchain_community.tools import DuckDuckGoSearchRun
7
+
8
+ # ── Tool 1: Web Search ──────────────────────────────────────────────────────
9
+
10
+ @tool
11
+ def web_search(query: str) -> str:
12
+ """
13
+ Search the web for current information.
14
+ Use this for facts, recent events, or anything not in your training data.
15
+
16
+ Args:
17
+ query: The search query string.
18
+ Returns:
19
+ A string containing the top search results.
20
+ """
21
+ # Try Tavily first (better quality), fall back to DuckDuckGo
22
+ tavily_key = os.getenv("TAVILY_API_KEY")
23
+ if tavily_key:
24
+ from tavily import TavilyClient
25
+ client = TavilyClient(api_key=tavily_key)
26
+ results = client.search(query=query, max_results=3)
27
+ # Format results into a readable string
28
+ return "\n\n".join([
29
+ f"Source: {r['url']}\n{r['content']}"
30
+ for r in results.get("results", [])
31
+ ])
32
+ else:
33
+ # DuckDuckGo fallback
34
+ search = DuckDuckGoSearchRun()
35
+ return search.run(query)
36
+
37
+
38
+ # ── Tool 2: Wikipedia Lookup ────────────────────────────────────────────────
39
+
40
+ @tool
41
+ def wikipedia_search(query: str) -> str:
42
+ """
43
+ Look up factual information on Wikipedia.
44
+ Prefer this over web_search for well-established facts, historical events,
45
+ or definitions of concepts.
46
+
47
+ Args:
48
+ query: The topic or person to search for.
49
+ Returns:
50
+ A summary from Wikipedia.
51
+ """
52
+ from langchain_community.utilities import WikipediaAPIWrapper
53
+ wiki = WikipediaAPIWrapper(top_k_results=2, doc_content_chars_max=2000)
54
+ return wiki.run(query)
55
+
56
+
57
+ # ── Tool 3: Calculator / Python Evaluator ───────────────────────────────────
58
+
59
+ @tool
60
+ def calculate(expression: str) -> str:
61
+ """
62
+ Evaluate a mathematical expression.
63
+ Use this for any arithmetic, unit conversions, or numerical reasoning.
64
+
65
+ Args:
66
+ expression: A valid Python math expression, e.g. "2 ** 10 / 1024"
67
+ Returns:
68
+ The result as a string.
69
+ """
70
+ try:
71
+ # Safe evaluation: only allow math operations
72
+ import math
73
+ allowed = {k: getattr(math, k) for k in dir(math) if not k.startswith('_')}
74
+ result = eval(expression, {"__builtins__": {}}, allowed)
75
+ return str(result)
76
+ except Exception as e:
77
+ return f"Error evaluating expression: {e}"
78
+
79
+
80
+ # ── Tool 4: Read Spreadsheet Files ──────────────────────────────────────────
81
+
82
+ @tool
83
+ def read_excel_file(task_id: str) -> str:
84
+ """
85
+ Download and read a spreadsheet (.xlsx or .csv) attached to a GAIA question.
86
+ Use this when the question references a file or asks about data in a table.
87
+
88
+ Args:
89
+ task_id: The GAIA task ID for the question (not the filename).
90
+ Returns:
91
+ A string representation of the file's contents.
92
+ """
93
+ api_url = f"https://agents-course-unit4-scoring.hf.space/files/{task_id}"
94
+ response = requests.get(api_url)
95
+
96
+ if response.status_code != 200:
97
+ return f"Could not download file for task {task_id}."
98
+
99
+ # Detect file type from headers or content
100
+ content_type = response.headers.get("content-type", "")
101
+
102
+ try:
103
+ import io
104
+ if "csv" in content_type or task_id.endswith(".csv"):
105
+ df = pd.read_csv(io.BytesIO(response.content))
106
+ else:
107
+ df = pd.read_excel(io.BytesIO(response.content))
108
+
109
+ # Return a text representation (first 50 rows)
110
+ return f"File contents (first 50 rows):\n{df.head(50).to_string()}"
111
+ except Exception as e:
112
+ return f"Error reading file: {e}"
113
+
114
+
115
+ # ── Collect all tools into a list ───────────────────────────────────────────
116
+
117
+ TOOLS = [web_search, wikipedia_search, calculate, read_excel_file]