import os import re from dataclasses import dataclass from typing import Any import gradio as gr import pandas as pd import requests from smolagents import CodeAgent, OpenAIServerModel, tool from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" DEFAULT_OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") @dataclass class AgentConfig: api_base_url: str = DEFAULT_API_URL openai_model: str = DEFAULT_OPENAI_MODEL openai_api_key_env: str = "OPENAI_API_KEY" openai_api_base: str = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") max_steps: int = 8 web_timeout_sec: int = 15 max_file_chars: int = 12000 def normalize_answer(text: str) -> str: value = (text or "").strip() value = re.sub(r"^FINAL\s*:\s*", "", value, flags=re.IGNORECASE).strip() value = value.strip().strip('"').strip() value = value.replace("FINAL ANSWER:", "").replace("Final answer:", "").strip() return value or "unknown" def fetch_questions(api_base_url: str) -> list[dict[str, Any]]: response = requests.get(f"{api_base_url}/questions", timeout=20) response.raise_for_status() data = response.json() if not isinstance(data, list): raise ValueError("Invalid /questions response format.") return data def submit_answers(api_base_url: str, payload: dict[str, Any]) -> dict[str, Any]: response = requests.post(f"{api_base_url}/submit", json=payload, timeout=90) response.raise_for_status() return response.json() class GAIASmolAgent: def __init__(self, config: AgentConfig): self.config = config api_key = os.getenv(config.openai_api_key_env) if not api_key: raise ValueError(f"Missing required secret: {config.openai_api_key_env}") self.model = OpenAIServerModel( model_id=config.openai_model, api_base=config.openai_api_base, api_key=api_key, temperature=0.0, max_tokens=1200, ) self.http = requests.Session() self.http.headers.update({"User-Agent": "gaia-smolagent/1.0"}) @tool def fetch_gaia_file(task_id: str) -> str: """ Fetch and read the file attached to a GAIA task. Args: task_id: The GAIA task id. """ url = f"{self.config.api_base_url}/files/{task_id}" try: response = self.http.get(url, timeout=self.config.web_timeout_sec) if response.status_code >= 400: return f"TOOL_ERROR: could not fetch file for task {task_id}. HTTP {response.status_code}" content_type = (response.headers.get("content-type") or "").lower() if "text" in content_type or "json" in content_type or "csv" in content_type: text = response.text text = re.sub(r"\s+", " ", text).strip() if len(text) > self.config.max_file_chars: text = text[: self.config.max_file_chars] + " ...[truncated]" return text size = len(response.content or b"") return f"Binary file fetched. Content-Type: {content_type or 'unknown'}, bytes: {size}" except requests.RequestException as e: return f"TOOL_ERROR: request failed: {e}" @tool def add_numbers(a: float, b: float) -> float: """ Add two numbers. Args: a: First number. b: Second number. """ return a + b @tool def subtract_numbers(a: float, b: float) -> float: """ Subtract two numbers. Args: a: First number. b: Second number. """ return a - b @tool def multiply_numbers(a: float, b: float) -> float: """ Multiply two numbers. Args: a: First number. b: Second number. """ return a * b @tool def divide_numbers(a: float, b: float) -> float: """ Divide two numbers. Args: a: Numerator. b: Denominator. """ if b == 0: return float("inf") return a / b @tool def power_number(base: float, exponent: float) -> float: """ Raise a number to a power. Args: base: Base value. exponent: Exponent value. """ return base**exponent self.agent = CodeAgent( model=self.model, tools=[ fetch_gaia_file, add_numbers, subtract_numbers, multiply_numbers, divide_numbers, power_number, DuckDuckGoSearchTool(), VisitWebpageTool(), ], max_steps=self.config.max_steps, add_base_tools=False, ) def solve_task(self, task_id: str, question: str) -> tuple[str, dict[str, Any]]: prompt = ( "You are solving one GAIA benchmark question.\n" "You must use tools when needed (duckduckgo search, webpage visit, arithmetic, fetch_gaia_file).\n" "Critical scoring rule: exact match. Return only the final answer text, nothing else.\n" "Never include labels like 'FINAL ANSWER'.\n\n" f"Task ID: {task_id}\n" f"Question: {question}\n\n" "If the question depends on an attached file, call fetch_gaia_file(task_id) with the exact task id." ) result = self.agent.run(prompt, reset=True) answer = normalize_answer(str(result)) meta = { "status": "ok", "steps": len(getattr(self.agent, "logs", []) or []), "tools": "smolagents", } return answer, meta def _agent_code_url() -> str: space_id = os.getenv("SPACE_ID") if space_id: return f"https://huggingface.co/spaces/{space_id}/tree/main" return "https://huggingface.co/spaces/unknown/tree/main" def generate_answers(profile: gr.OAuthProfile | None): if not profile: return "Please login to Hugging Face first.", None, [], "" username = profile.username.strip() config = AgentConfig() try: questions = fetch_questions(config.api_base_url) except Exception as e: return f"Failed to fetch questions: {e}", None, [], username try: agent = GAIASmolAgent(config=config) except Exception as e: return f"Failed to initialize smolagents agent: {e}", None, [], username answers_payload: list[dict[str, str]] = [] rows: list[dict[str, Any]] = [] for item in questions: task_id = item.get("task_id") question_text = item.get("question") if not task_id or question_text is None: continue try: answer, meta = agent.solve_task(task_id=str(task_id), question=str(question_text)) answers_payload.append({"task_id": str(task_id), "submitted_answer": answer}) rows.append( { "Task ID": str(task_id), "Question": str(question_text), "Submitted Answer": answer, "Status": meta["status"], "Steps": meta["steps"], "Tools": meta["tools"], } ) except Exception as e: rows.append( { "Task ID": str(task_id), "Question": str(question_text), "Submitted Answer": "unknown", "Status": f"agent_error: {e}", "Steps": 0, "Tools": "smolagents", } ) if not answers_payload: return "No answers were generated.", pd.DataFrame(rows), [], username status = ( f"Generated {len(answers_payload)} answers for user '{username}'. " "Review the table, then click submit." ) return status, pd.DataFrame(rows), answers_payload, username def submit_generated_answers(answers_payload: list[dict[str, str]], username: str): if not username: return "Missing username in session. Click 'Generate Answers' after logging in." if not answers_payload: return "No generated answers found. Click 'Generate Answers' first." clean_answers: list[dict[str, str]] = [] for item in answers_payload: task_id = str(item.get("task_id", "")).strip() submitted = normalize_answer(str(item.get("submitted_answer", ""))) if not task_id: continue clean_answers.append({"task_id": task_id, "submitted_answer": submitted}) if not clean_answers: return "Generated answers are invalid or empty." payload = { "username": username, "agent_code": _agent_code_url(), "answers": clean_answers, } try: result = submit_answers(DEFAULT_API_URL, payload) return ( f"Submission Successful!\n" f"User: {result.get('username', username)}\n" f"Overall Score: {result.get('score', 'N/A')}% " f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" f"Message: {result.get('message', 'No message received.')}" ) except requests.exceptions.HTTPError as e: detail = f"HTTP {e.response.status_code}" try: body = e.response.json() detail = f"{detail} - {body.get('detail', body)}" except Exception: detail = f"{detail} - {e.response.text[:500]}" return f"Submission failed: {detail}" except Exception as e: return f"Submission failed: {e}" with gr.Blocks() as demo: gr.Markdown("# GAIA smolagents Runner") gr.Markdown( """ Two-step flow: 1. Generate answers for all tasks. 2. Submit generated answers to leaderboard scoring. Required Space secrets: - `OPENAI_API_KEY` Optional: - `OPENAI_MODEL` (default: `gpt-4o-mini`) - `OPENAI_API_BASE` (default: `https://api.openai.com/v1`) """ ) gr.LoginButton() generated_answers_state = gr.State([]) username_state = gr.State("") with gr.Row(): generate_button = gr.Button("1) Generate Answers", variant="primary") submit_button = gr.Button("2) Submit Generated Answers") status_output = gr.Textbox(label="Status", lines=6, interactive=False) results_table = gr.DataFrame(label="Generated Answers", wrap=True) generate_button.click( fn=generate_answers, outputs=[status_output, results_table, generated_answers_state, username_state], ) submit_button.click( fn=submit_generated_answers, inputs=[generated_answers_state, username_state], outputs=[status_output], ) if __name__ == "__main__": print("\n" + "-" * 30 + " App Starting " + "-" * 30) print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}") print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}") print("-" * (60 + len(" App Starting ")) + "\n") demo.launch(debug=True, share=False)