| """ |
| GAIA Sample Tasks for Testing the AI Agent |
| |
| This file contains sample tasks from the GAIA benchmark categories |
| to test the agent's capabilities across different skills. |
| """ |
|
|
| |
| GAIA_SAMPLE_TASKS = [ |
| |
| { |
| "category": "reasoning", |
| "difficulty": "easy", |
| "task": "If a train travels at 60 miles per hour, how far will it travel in 2.5 hours?" |
| }, |
| { |
| "category": "reasoning", |
| "difficulty": "medium", |
| "task": "A store is having a 30% off sale. If an item originally costs $85, what is the sale price? Additionally, if there's a 8% sales tax, what is the final price?" |
| }, |
| { |
| "category": "reasoning", |
| "difficulty": "hard", |
| "task": "In a class of 30 students, 40% are boys. If 3 more girls join the class, what percentage of the class will be boys?" |
| }, |
| |
| |
| { |
| "category": "web_search", |
| "difficulty": "easy", |
| "task": "What is the capital of Japan and what is its population?" |
| }, |
| { |
| "category": "web_search", |
| "difficulty": "medium", |
| "task": "Who won the Nobel Prize in Physics in 2023? What was their contribution?" |
| }, |
| { |
| "category": "web_search", |
| "difficulty": "hard", |
| "task": "Compare and contrast the climate policies of the United States and the European Union. What are the key differences in their approaches to reducing carbon emissions?" |
| }, |
| |
| |
| { |
| "category": "multimodal", |
| "difficulty": "easy", |
| "task": "Analyze this image URL and describe what you see: https://upload.wikimedia.org/wikipedia/commons/thumb/e/ec/Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg/800px-Mona_Lisa%2C_by_Leonardo_da_Vinci%2C_from_C2RMF_retouched.jpg" |
| }, |
| { |
| "category": "multimodal", |
| "difficulty": "medium", |
| "task": "Look at this chart image and explain the trend: https://upload.wikimedia.org/wikipedia/commons/thumb/5/51/Global-surface-temperature.svg/1200px-Global-surface-temperature.svg.png" |
| }, |
| |
| |
| { |
| "category": "tool_usage", |
| "difficulty": "easy", |
| "task": "Write a Python function to calculate the factorial of a number, then use it to find the factorial of 5." |
| }, |
| { |
| "category": "tool_usage", |
| "difficulty": "medium", |
| "task": "Create a Python script that fetches the current weather for New York City using a weather API and displays the temperature, humidity, and weather conditions." |
| }, |
| { |
| "category": "tool_usage", |
| "difficulty": "hard", |
| "task": "Write a Python script that analyzes a text file containing a list of numbers (one per line), calculates the mean, median, mode, and standard deviation, and creates a histogram visualization of the data." |
| }, |
| |
| |
| { |
| "category": "combined", |
| "difficulty": "medium", |
| "task": "Research the top 3 electric vehicle manufacturers by market share. Create a Python script to visualize their market shares in a pie chart." |
| }, |
| { |
| "category": "combined", |
| "difficulty": "hard", |
| "task": "Find information about global coffee production by country for the last year. Write a Python script to create a bar chart showing the top 5 coffee-producing countries and their production volumes." |
| } |
| ] |
|
|
| |
| def get_tasks_by_category(category): |
| return [task for task in GAIA_SAMPLE_TASKS if task["category"] == category] |
|
|
| |
| def get_tasks_by_difficulty(difficulty): |
| return [task for task in GAIA_SAMPLE_TASKS if task["difficulty"] == difficulty] |
|
|
| |
| def get_all_task_queries(): |
| return [task["task"] for task in GAIA_SAMPLE_TASKS] |
|
|
| |
| def get_quick_test_tasks(): |
| |
| quick_test_tasks = [ |
| GAIA_SAMPLE_TASKS[0], |
| GAIA_SAMPLE_TASKS[3], |
| GAIA_SAMPLE_TASKS[6], |
| GAIA_SAMPLE_TASKS[9], |
| GAIA_SAMPLE_TASKS[11] |
| ] |
| return [task["task"] for task in quick_test_tasks] |
|
|