yoshizen commited on
Commit
d227e0d
·
verified ·
1 Parent(s): 162ee47

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +220 -0
  2. deployment.py +347 -0
  3. gaia_sample_tasks.py +108 -0
  4. requirements .txt +8 -0
app.py ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main application file for the GAIA-Ready AI Agent web interface
3
+
4
+ This file serves as the entry point for the Hugging Face Spaces deployment.
5
+ It creates and launches a Gradio interface for the agent.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import gradio as gr
11
+ from typing import Dict, Any
12
+
13
+ # Ensure all necessary modules are installed
14
+ try:
15
+ import smolagents
16
+ except ImportError:
17
+ import subprocess
18
+ subprocess.check_call(["pip", "install", "smolagents"])
19
+
20
+ try:
21
+ import sentence_transformers
22
+ except ImportError:
23
+ import subprocess
24
+ subprocess.check_call(["pip", "install", "sentence-transformers"])
25
+
26
+ # Import the enhanced agent
27
+ try:
28
+ from enhanced_agent import EnhancedGAIAAgent
29
+ except ImportError:
30
+ print("Error: Could not import EnhancedGAIAAgent.")
31
+ print("Make sure enhanced_agent.py is in the same directory.")
32
+ sys.exit(1)
33
+
34
+ # Import optimized prompts if available
35
+ try:
36
+ from optimized_prompts import get_enhanced_system_prompt, get_enhanced_reasoning_template
37
+ USING_OPTIMIZED_PROMPTS = True
38
+ except ImportError:
39
+ print("Warning: Could not import optimized prompts.")
40
+ print("The agent will use default prompts.")
41
+ USING_OPTIMIZED_PROMPTS = False
42
+
43
+ # Check if running in Hugging Face Spaces
44
+ IS_HF_SPACES = os.environ.get("SPACE_ID") is not None
45
+
46
+ class AgentApp:
47
+ """
48
+ Gradio application for the GAIA-Ready AI Agent
49
+ """
50
+ def __init__(self, use_local_model: bool = False, use_semantic_memory: bool = True):
51
+ """
52
+ Initialize the agent application
53
+
54
+ Args:
55
+ use_local_model: Whether to use a local model via Ollama
56
+ use_semantic_memory: Whether to use semantic search for memory retrieval
57
+ """
58
+ self.agent = None
59
+ self.use_local_model = use_local_model
60
+ self.use_semantic_memory = use_semantic_memory
61
+ self.history = []
62
+ self.api_key = os.environ.get("HF_API_KEY", "")
63
+
64
+ # Initialize the interface
65
+ self.interface = self._create_interface()
66
+
67
+ def _initialize_agent(self, api_key: str = "") -> str:
68
+ """
69
+ Initialize the agent with the provided API key
70
+
71
+ Args:
72
+ api_key: Hugging Face API key
73
+
74
+ Returns:
75
+ Initialization status message
76
+ """
77
+ if api_key:
78
+ self.api_key = api_key
79
+
80
+ try:
81
+ self.agent = EnhancedGAIAAgent(
82
+ api_key=self.api_key,
83
+ use_local_model=self.use_local_model,
84
+ use_semantic_memory=self.use_semantic_memory
85
+ )
86
+ return "Agent initialized successfully!"
87
+ except Exception as e:
88
+ return f"Error initializing agent: {str(e)}"
89
+
90
+ def _process_query(self, query: str, api_key: str = "", max_iterations: int = 3) -> str:
91
+ """
92
+ Process a user query with the agent
93
+
94
+ Args:
95
+ query: The user's query
96
+ api_key: Hugging Face API key (optional)
97
+ max_iterations: Maximum number of iterations
98
+
99
+ Returns:
100
+ Agent's response
101
+ """
102
+ # Initialize agent if not already initialized or if API key changed
103
+ if self.agent is None or (api_key and api_key != self.api_key):
104
+ init_message = self._initialize_agent(api_key)
105
+ if "Error" in init_message:
106
+ return init_message
107
+
108
+ try:
109
+ # Process the query
110
+ result = self.agent.solve(query, max_iterations=max_iterations, verbose=True)
111
+
112
+ # Add to history
113
+ self.history.append({
114
+ "query": query,
115
+ "response": result.get("answer", "No answer provided."),
116
+ "success": result.get("success", False)
117
+ })
118
+
119
+ # Return the answer
120
+ return result.get("answer", "I couldn't generate an answer for this query.")
121
+ except Exception as e:
122
+ error_message = f"Error processing query: {str(e)}"
123
+ print(error_message)
124
+ return error_message
125
+
126
+ def _create_interface(self) -> gr.Blocks:
127
+ """
128
+ Create the Gradio interface
129
+
130
+ Returns:
131
+ Gradio Blocks interface
132
+ """
133
+ with gr.Blocks(title="GAIA-Ready AI Agent") as interface:
134
+ gr.Markdown("# GAIA-Ready AI Agent")
135
+ gr.Markdown("""
136
+ This AI agent is designed to excel at the GAIA benchmark from the Hugging Face Agents Course.
137
+ It implements the Think-Act-Observe workflow and includes tools for web search, calculation,
138
+ image analysis, and code execution.
139
+
140
+ Enter your query below and the agent will solve it step by step.
141
+ """)
142
+
143
+ with gr.Row():
144
+ with gr.Column(scale=3):
145
+ api_key_input = gr.Textbox(
146
+ label="Hugging Face API Key (optional)",
147
+ placeholder="Enter your Hugging Face API key here...",
148
+ type="password"
149
+ )
150
+
151
+ with gr.Column(scale=1):
152
+ max_iterations_slider = gr.Slider(
153
+ minimum=1,
154
+ maximum=5,
155
+ value=3,
156
+ step=1,
157
+ label="Max Iterations"
158
+ )
159
+
160
+ query_input = gr.Textbox(
161
+ label="Your Query",
162
+ placeholder="Enter your query here...",
163
+ lines=3
164
+ )
165
+
166
+ submit_button = gr.Button("Submit")
167
+
168
+ response_output = gr.Textbox(
169
+ label="Agent Response",
170
+ lines=15
171
+ )
172
+
173
+ # Sample queries
174
+ gr.Markdown("### Sample Queries")
175
+ sample_queries = [
176
+ "What is the capital of France and what is its population? Also, calculate 15% of this population.",
177
+ "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5.",
178
+ "Compare and contrast renewable and non-renewable energy sources.",
179
+ "Analyze this image: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg"
180
+ ]
181
+
182
+ for query in sample_queries:
183
+ sample_button = gr.Button(f"Try: {query[:50]}..." if len(query) > 50 else f"Try: {query}")
184
+ sample_button.click(
185
+ fn=lambda q=query: q,
186
+ outputs=query_input
187
+ )
188
+
189
+ # Set up event handlers
190
+ submit_button.click(
191
+ fn=self._process_query,
192
+ inputs=[query_input, api_key_input, max_iterations_slider],
193
+ outputs=response_output
194
+ )
195
+
196
+ # Add examples
197
+ gr.Examples(
198
+ examples=sample_queries,
199
+ inputs=query_input
200
+ )
201
+
202
+ return interface
203
+
204
+ def launch(self, share: bool = False) -> None:
205
+ """
206
+ Launch the Gradio interface
207
+
208
+ Args:
209
+ share: Whether to create a public link
210
+ """
211
+ self.interface.launch(share=share)
212
+
213
+
214
+ # Create and launch the agent app
215
+ app = AgentApp(use_local_model=False, use_semantic_memory=True)
216
+ interface = app.interface
217
+
218
+ # For Hugging Face Spaces
219
+ if __name__ == "__main__":
220
+ interface.launch()
deployment.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Deployment configuration for Hugging Face Spaces
3
+
4
+ This file contains the necessary configuration and setup for deploying
5
+ the GAIA-Ready AI Agent to Hugging Face Spaces.
6
+ """
7
+
8
+ import os
9
+ import sys
10
+ import json
11
+ from typing import Dict, Any, List, Optional, Union
12
+
13
+ # Import required modules
14
+ try:
15
+ import gradio as gr
16
+ except ImportError:
17
+ import subprocess
18
+ subprocess.check_call(["pip", "install", "gradio"])
19
+ import gradio as gr
20
+
21
+ # Import the enhanced agent
22
+ try:
23
+ from enhanced_agent import EnhancedGAIAAgent
24
+ except ImportError:
25
+ print("Error: Could not import EnhancedGAIAAgent.")
26
+ print("Make sure enhanced_agent.py is in the same directory.")
27
+ sys.exit(1)
28
+
29
+ # Import optimized prompts
30
+ try:
31
+ from optimized_prompts import get_enhanced_system_prompt, get_enhanced_reasoning_template
32
+ except ImportError:
33
+ print("Warning: Could not import optimized prompts.")
34
+ print("The agent will use default prompts.")
35
+
36
+ # Check if running in Hugging Face Spaces
37
+ IS_HF_SPACES = os.environ.get("SPACE_ID") is not None
38
+
39
+ # Configuration for Hugging Face Spaces
40
+ HF_SPACES_CONFIG = {
41
+ "title": "GAIA-Ready AI Agent",
42
+ "description": "An advanced AI agent designed to excel at the GAIA benchmark from the Hugging Face Agents Course.",
43
+ "tags": ["agents", "gaia", "huggingface-course", "smolagents", "llm"],
44
+ "sdk": "gradio",
45
+ "sdk_version": "3.50.2",
46
+ "python_version": "3.11",
47
+ "app_file": "app.py",
48
+ "license": "mit"
49
+ }
50
+
51
+ class AgentApp:
52
+ """
53
+ Gradio application for the GAIA-Ready AI Agent
54
+ """
55
+ def __init__(self, use_local_model: bool = False, use_semantic_memory: bool = True):
56
+ """
57
+ Initialize the agent application
58
+
59
+ Args:
60
+ use_local_model: Whether to use a local model via Ollama
61
+ use_semantic_memory: Whether to use semantic search for memory retrieval
62
+ """
63
+ self.agent = None
64
+ self.use_local_model = use_local_model
65
+ self.use_semantic_memory = use_semantic_memory
66
+ self.history = []
67
+ self.api_key = os.environ.get("HF_API_KEY", "")
68
+
69
+ # Initialize the interface
70
+ self.interface = self._create_interface()
71
+
72
+ def _initialize_agent(self, api_key: str = "") -> None:
73
+ """
74
+ Initialize the agent with the provided API key
75
+
76
+ Args:
77
+ api_key: Hugging Face API key
78
+ """
79
+ if api_key:
80
+ self.api_key = api_key
81
+
82
+ try:
83
+ self.agent = EnhancedGAIAAgent(
84
+ api_key=self.api_key,
85
+ use_local_model=self.use_local_model,
86
+ use_semantic_memory=self.use_semantic_memory
87
+ )
88
+ return "Agent initialized successfully!"
89
+ except Exception as e:
90
+ return f"Error initializing agent: {str(e)}"
91
+
92
+ def _process_query(self, query: str, api_key: str = "", max_iterations: int = 3) -> str:
93
+ """
94
+ Process a user query with the agent
95
+
96
+ Args:
97
+ query: The user's query
98
+ api_key: Hugging Face API key (optional)
99
+ max_iterations: Maximum number of iterations
100
+
101
+ Returns:
102
+ Agent's response
103
+ """
104
+ # Initialize agent if not already initialized or if API key changed
105
+ if self.agent is None or (api_key and api_key != self.api_key):
106
+ init_message = self._initialize_agent(api_key)
107
+ if "Error" in init_message:
108
+ return init_message
109
+
110
+ try:
111
+ # Process the query
112
+ result = self.agent.solve(query, max_iterations=max_iterations, verbose=True)
113
+
114
+ # Add to history
115
+ self.history.append({
116
+ "query": query,
117
+ "response": result.get("answer", "No answer provided."),
118
+ "success": result.get("success", False)
119
+ })
120
+
121
+ # Return the answer
122
+ return result.get("answer", "I couldn't generate an answer for this query.")
123
+ except Exception as e:
124
+ error_message = f"Error processing query: {str(e)}"
125
+ print(error_message)
126
+ return error_message
127
+
128
+ def _create_interface(self) -> gr.Blocks:
129
+ """
130
+ Create the Gradio interface
131
+
132
+ Returns:
133
+ Gradio Blocks interface
134
+ """
135
+ with gr.Blocks(title="GAIA-Ready AI Agent") as interface:
136
+ gr.Markdown("# GAIA-Ready AI Agent")
137
+ gr.Markdown("""
138
+ This AI agent is designed to excel at the GAIA benchmark from the Hugging Face Agents Course.
139
+ It implements the Think-Act-Observe workflow and includes tools for web search, calculation,
140
+ image analysis, and code execution.
141
+
142
+ Enter your query below and the agent will solve it step by step.
143
+ """)
144
+
145
+ with gr.Row():
146
+ with gr.Column(scale=3):
147
+ api_key_input = gr.Textbox(
148
+ label="Hugging Face API Key (optional)",
149
+ placeholder="Enter your Hugging Face API key here...",
150
+ type="password"
151
+ )
152
+
153
+ with gr.Column(scale=1):
154
+ max_iterations_slider = gr.Slider(
155
+ minimum=1,
156
+ maximum=5,
157
+ value=3,
158
+ step=1,
159
+ label="Max Iterations"
160
+ )
161
+
162
+ query_input = gr.Textbox(
163
+ label="Your Query",
164
+ placeholder="Enter your query here...",
165
+ lines=3
166
+ )
167
+
168
+ submit_button = gr.Button("Submit")
169
+
170
+ response_output = gr.Textbox(
171
+ label="Agent Response",
172
+ lines=15
173
+ )
174
+
175
+ # Sample queries
176
+ gr.Markdown("### Sample Queries")
177
+ sample_queries = [
178
+ "What is the capital of France and what is its population? Also, calculate 15% of this population.",
179
+ "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5.",
180
+ "Compare and contrast renewable and non-renewable energy sources.",
181
+ "Analyze this image: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg"
182
+ ]
183
+
184
+ for query in sample_queries:
185
+ sample_button = gr.Button(f"Try: {query[:50]}..." if len(query) > 50 else f"Try: {query}")
186
+ sample_button.click(
187
+ fn=lambda q=query: q,
188
+ outputs=query_input
189
+ )
190
+
191
+ # Set up event handlers
192
+ submit_button.click(
193
+ fn=self._process_query,
194
+ inputs=[query_input, api_key_input, max_iterations_slider],
195
+ outputs=response_output
196
+ )
197
+
198
+ # Add examples
199
+ gr.Examples(
200
+ examples=sample_queries,
201
+ inputs=query_input
202
+ )
203
+
204
+ return interface
205
+
206
+ def launch(self, share: bool = False) -> None:
207
+ """
208
+ Launch the Gradio interface
209
+
210
+ Args:
211
+ share: Whether to create a public link
212
+ """
213
+ self.interface.launch(share=share)
214
+
215
+
216
+ def create_requirements_file() -> None:
217
+ """
218
+ Create requirements.txt file for Hugging Face Spaces
219
+ """
220
+ requirements = [
221
+ "smolagents>=0.1.0",
222
+ "sentence-transformers>=2.2.2",
223
+ "gradio>=3.50.2",
224
+ "requests>=2.31.0",
225
+ "beautifulsoup4>=4.12.2",
226
+ "numpy>=1.24.3",
227
+ "matplotlib>=3.7.1",
228
+ "pillow>=9.5.0"
229
+ ]
230
+
231
+ with open("requirements.txt", "w") as f:
232
+ f.write("\n".join(requirements))
233
+
234
+ print("Created requirements.txt file")
235
+
236
+
237
+ def create_readme_file() -> None:
238
+ """
239
+ Create README.md file for Hugging Face Spaces
240
+ """
241
+ readme_content = """
242
+ # GAIA-Ready AI Agent
243
+
244
+ This AI agent is designed to excel at the GAIA benchmark from the Hugging Face Agents Course.
245
+
246
+ ## Features
247
+
248
+ - Implements the Think-Act-Observe workflow
249
+ - Includes tools for web search, calculation, image analysis, and code execution
250
+ - Uses advanced memory and reasoning systems
251
+ - Optimized for the GAIA benchmark
252
+
253
+ ## Usage
254
+
255
+ 1. Enter your Hugging Face API key (optional)
256
+ 2. Set the maximum number of iterations
257
+ 3. Enter your query
258
+ 4. Click Submit
259
+
260
+ ## Sample Queries
261
+
262
+ - "What is the capital of France and what is its population? Also, calculate 15% of this population."
263
+ - "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5."
264
+ - "Compare and contrast renewable and non-renewable energy sources."
265
+ - "Analyze this image: [Mona Lisa](https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg)"
266
+
267
+ ## How It Works
268
+
269
+ The agent uses a three-step workflow:
270
+
271
+ 1. **Think**: Analyze the task and plan an approach
272
+ 2. **Act**: Use appropriate tools to gather information or perform actions
273
+ 3. **Observe**: Analyze the results and adjust the approach if needed
274
+
275
+ ## Development
276
+
277
+ This agent was developed as part of the Hugging Face Agents Course. It uses the smolagents framework and is optimized for the GAIA benchmark.
278
+ """
279
+
280
+ with open("README.md", "w") as f:
281
+ f.write(readme_content.strip())
282
+
283
+ print("Created README.md file")
284
+
285
+
286
+ def create_app_file() -> None:
287
+ """
288
+ Create app.py file for Hugging Face Spaces
289
+ """
290
+ app_content = """
291
+ import os
292
+ import sys
293
+ from deployment import AgentApp
294
+
295
+ # Create and launch the agent app
296
+ app = AgentApp(use_local_model=False, use_semantic_memory=True)
297
+ interface = app.interface
298
+
299
+ # For Hugging Face Spaces
300
+ if __name__ == "__main__":
301
+ interface.launch()
302
+ """
303
+
304
+ with open("app.py", "w") as f:
305
+ f.write(app_content.strip())
306
+
307
+ print("Created app.py file")
308
+
309
+
310
+ def prepare_for_deployment() -> None:
311
+ """
312
+ Prepare all necessary files for deployment to Hugging Face Spaces
313
+ """
314
+ print("Preparing for deployment to Hugging Face Spaces...")
315
+
316
+ # Create requirements.txt
317
+ create_requirements_file()
318
+
319
+ # Create README.md
320
+ create_readme_file()
321
+
322
+ # Create app.py
323
+ create_app_file()
324
+
325
+ # Create .gitignore
326
+ with open(".gitignore", "w") as f:
327
+ f.write("__pycache__/\n*.py[cod]\n*$py.class\n.env\n*.json\nagent_memory.json\n")
328
+
329
+ print("All deployment files created successfully!")
330
+ print("To deploy to Hugging Face Spaces:")
331
+ print("1. Create a new Space on Hugging Face")
332
+ print("2. Select Gradio as the SDK")
333
+ print("3. Upload all the files in this directory")
334
+ print("4. Set the HF_API_KEY environment variable in the Space settings")
335
+
336
+
337
+ # Example usage
338
+ if __name__ == "__main__":
339
+ # Prepare for deployment
340
+ prepare_for_deployment()
341
+
342
+ # Test the app locally
343
+ print("\nTesting the app locally...")
344
+ app = AgentApp(use_local_model=False, use_semantic_memory=True)
345
+
346
+ # Launch with share=True to create a public link
347
+ app.launch(share=True)
gaia_sample_tasks.py ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GAIA Sample Tasks for Testing the AI Agent
3
+
4
+ This file contains sample tasks from the GAIA benchmark categories
5
+ to test the agent's capabilities across different skills.
6
+ """
7
+
8
+ # Sample GAIA tasks for testing the agent
9
+ GAIA_SAMPLE_TASKS = [
10
+ # Reasoning tasks
11
+ {
12
+ "category": "reasoning",
13
+ "difficulty": "easy",
14
+ "task": "If a train travels at 60 miles per hour, how far will it travel in 2.5 hours?"
15
+ },
16
+ {
17
+ "category": "reasoning",
18
+ "difficulty": "medium",
19
+ "task": "A store is having a 30% off sale. If an item originally costs $85, what is the sale price? Additionally, if there's a 8% sales tax, what is the final price?"
20
+ },
21
+ {
22
+ "category": "reasoning",
23
+ "difficulty": "hard",
24
+ "task": "In a class of 30 students, 40% are boys. If 3 more girls join the class, what percentage of the class will be boys?"
25
+ },
26
+
27
+ # Web search and information retrieval tasks
28
+ {
29
+ "category": "web_search",
30
+ "difficulty": "easy",
31
+ "task": "What is the capital of Japan and what is its population?"
32
+ },
33
+ {
34
+ "category": "web_search",
35
+ "difficulty": "medium",
36
+ "task": "Who won the Nobel Prize in Physics in 2023? What was their contribution?"
37
+ },
38
+ {
39
+ "category": "web_search",
40
+ "difficulty": "hard",
41
+ "task": "Compare and contrast the climate policies of the United States and the European Union. What are the key differences in their approaches to reducing carbon emissions?"
42
+ },
43
+
44
+ # Multimodal understanding tasks (would require image input in a real scenario)
45
+ {
46
+ "category": "multimodal",
47
+ "difficulty": "easy",
48
+ "task": "Analyze this image URL and describe what you see: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg"
49
+ },
50
+ {
51
+ "category": "multimodal",
52
+ "difficulty": "medium",
53
+ "task": "Look at this chart image and explain the trend: https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/Global-surface-temperature.svg/1200px-Global-surface-temperature.svg.png"
54
+ },
55
+
56
+ # Tool usage tasks
57
+ {
58
+ "category": "tool_usage",
59
+ "difficulty": "easy",
60
+ "task": "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5."
61
+ },
62
+ {
63
+ "category": "tool_usage",
64
+ "difficulty": "medium",
65
+ "task": "Create a Python script that fetches the current weather for New York City using a weather API and displays the temperature, humidity, and weather conditions."
66
+ },
67
+ {
68
+ "category": "tool_usage",
69
+ "difficulty": "hard",
70
+ "task": "Write a Python script that analyzes a text file containing a list of numbers (one per line), calculates the mean, median, mode, and standard deviation, and creates a histogram visualization of the data."
71
+ },
72
+
73
+ # Combined skills tasks
74
+ {
75
+ "category": "combined",
76
+ "difficulty": "medium",
77
+ "task": "Research the top 3 electric vehicle manufacturers by market share. Create a Python script to visualize their market shares in a pie chart."
78
+ },
79
+ {
80
+ "category": "combined",
81
+ "difficulty": "hard",
82
+ "task": "Find information about global coffee production by country for the last year. Write a Python script to create a bar chart showing the top 5 coffee-producing countries and their production volumes."
83
+ }
84
+ ]
85
+
86
+ # Function to get tasks by category
87
+ def get_tasks_by_category(category):
88
+ return [task for task in GAIA_SAMPLE_TASKS if task["category"] == category]
89
+
90
+ # Function to get tasks by difficulty
91
+ def get_tasks_by_difficulty(difficulty):
92
+ return [task for task in GAIA_SAMPLE_TASKS if task["difficulty"] == difficulty]
93
+
94
+ # Function to get all task queries as a list
95
+ def get_all_task_queries():
96
+ return [task["task"] for task in GAIA_SAMPLE_TASKS]
97
+
98
+ # Function to get a subset of tasks for quick testing
99
+ def get_quick_test_tasks():
100
+ # One task from each category and difficulty level
101
+ quick_test_tasks = [
102
+ GAIA_SAMPLE_TASKS[0], # reasoning, easy
103
+ GAIA_SAMPLE_TASKS[3], # web_search, easy
104
+ GAIA_SAMPLE_TASKS[6], # multimodal, easy
105
+ GAIA_SAMPLE_TASKS[9], # tool_usage, medium
106
+ GAIA_SAMPLE_TASKS[11] # combined, medium
107
+ ]
108
+ return [task["task"] for task in quick_test_tasks]
requirements .txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ smolagents>=0.1.0
2
+ sentence-transformers>=2.2.2
3
+ gradio>=3.50.2
4
+ requests>=2.31.0
5
+ beautifulsoup4>=4.12.2
6
+ numpy>=1.24.3
7
+ matplotlib>=3.7.1
8
+ pillow>=9.5.0