Spaces:
Sleeping
Sleeping
| import os | |
| import re | |
| from dataclasses import dataclass | |
| from typing import Any | |
| import gradio as gr | |
| import pandas as pd | |
| import requests | |
| from smolagents import CodeAgent, OpenAIServerModel, tool | |
| from smolagents.default_tools import DuckDuckGoSearchTool, VisitWebpageTool | |
| DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space" | |
| DEFAULT_OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini") | |
| class AgentConfig: | |
| api_base_url: str = DEFAULT_API_URL | |
| openai_model: str = DEFAULT_OPENAI_MODEL | |
| openai_api_key_env: str = "OPENAI_API_KEY" | |
| openai_api_base: str = os.getenv("OPENAI_API_BASE", "https://api.openai.com/v1") | |
| max_steps: int = 8 | |
| web_timeout_sec: int = 15 | |
| max_file_chars: int = 12000 | |
| def normalize_answer(text: str) -> str: | |
| value = (text or "").strip() | |
| value = re.sub(r"^FINAL\s*:\s*", "", value, flags=re.IGNORECASE).strip() | |
| value = value.strip().strip('"').strip() | |
| value = value.replace("FINAL ANSWER:", "").replace("Final answer:", "").strip() | |
| return value or "unknown" | |
| def fetch_questions(api_base_url: str) -> list[dict[str, Any]]: | |
| response = requests.get(f"{api_base_url}/questions", timeout=20) | |
| response.raise_for_status() | |
| data = response.json() | |
| if not isinstance(data, list): | |
| raise ValueError("Invalid /questions response format.") | |
| return data | |
| def submit_answers(api_base_url: str, payload: dict[str, Any]) -> dict[str, Any]: | |
| response = requests.post(f"{api_base_url}/submit", json=payload, timeout=90) | |
| response.raise_for_status() | |
| return response.json() | |
| class GAIASmolAgent: | |
| def __init__(self, config: AgentConfig): | |
| self.config = config | |
| api_key = os.getenv(config.openai_api_key_env) | |
| if not api_key: | |
| raise ValueError(f"Missing required secret: {config.openai_api_key_env}") | |
| self.model = OpenAIServerModel( | |
| model_id=config.openai_model, | |
| api_base=config.openai_api_base, | |
| api_key=api_key, | |
| temperature=0.0, | |
| max_tokens=1200, | |
| ) | |
| self.http = requests.Session() | |
| self.http.headers.update({"User-Agent": "gaia-smolagent/1.0"}) | |
| def fetch_gaia_file(task_id: str) -> str: | |
| """ | |
| Fetch and read the file attached to a GAIA task. | |
| Args: | |
| task_id: The GAIA task id. | |
| """ | |
| url = f"{self.config.api_base_url}/files/{task_id}" | |
| try: | |
| response = self.http.get(url, timeout=self.config.web_timeout_sec) | |
| if response.status_code >= 400: | |
| return f"TOOL_ERROR: could not fetch file for task {task_id}. HTTP {response.status_code}" | |
| content_type = (response.headers.get("content-type") or "").lower() | |
| if "text" in content_type or "json" in content_type or "csv" in content_type: | |
| text = response.text | |
| text = re.sub(r"\s+", " ", text).strip() | |
| if len(text) > self.config.max_file_chars: | |
| text = text[: self.config.max_file_chars] + " ...[truncated]" | |
| return text | |
| size = len(response.content or b"") | |
| return f"Binary file fetched. Content-Type: {content_type or 'unknown'}, bytes: {size}" | |
| except requests.RequestException as e: | |
| return f"TOOL_ERROR: request failed: {e}" | |
| def add_numbers(a: float, b: float) -> float: | |
| """ | |
| Add two numbers. | |
| Args: | |
| a: First number. | |
| b: Second number. | |
| """ | |
| return a + b | |
| def subtract_numbers(a: float, b: float) -> float: | |
| """ | |
| Subtract two numbers. | |
| Args: | |
| a: First number. | |
| b: Second number. | |
| """ | |
| return a - b | |
| def multiply_numbers(a: float, b: float) -> float: | |
| """ | |
| Multiply two numbers. | |
| Args: | |
| a: First number. | |
| b: Second number. | |
| """ | |
| return a * b | |
| def divide_numbers(a: float, b: float) -> float: | |
| """ | |
| Divide two numbers. | |
| Args: | |
| a: Numerator. | |
| b: Denominator. | |
| """ | |
| if b == 0: | |
| return float("inf") | |
| return a / b | |
| def power_number(base: float, exponent: float) -> float: | |
| """ | |
| Raise a number to a power. | |
| Args: | |
| base: Base value. | |
| exponent: Exponent value. | |
| """ | |
| return base**exponent | |
| self.agent = CodeAgent( | |
| model=self.model, | |
| tools=[ | |
| fetch_gaia_file, | |
| add_numbers, | |
| subtract_numbers, | |
| multiply_numbers, | |
| divide_numbers, | |
| power_number, | |
| DuckDuckGoSearchTool(), | |
| VisitWebpageTool(), | |
| ], | |
| max_steps=self.config.max_steps, | |
| add_base_tools=False, | |
| ) | |
| def solve_task(self, task_id: str, question: str) -> tuple[str, dict[str, Any]]: | |
| prompt = ( | |
| "You are solving one GAIA benchmark question.\n" | |
| "You must use tools when needed (duckduckgo search, webpage visit, arithmetic, fetch_gaia_file).\n" | |
| "Critical scoring rule: exact match. Return only the final answer text, nothing else.\n" | |
| "Never include labels like 'FINAL ANSWER'.\n\n" | |
| f"Task ID: {task_id}\n" | |
| f"Question: {question}\n\n" | |
| "If the question depends on an attached file, call fetch_gaia_file(task_id) with the exact task id." | |
| ) | |
| result = self.agent.run(prompt, reset=True) | |
| answer = normalize_answer(str(result)) | |
| meta = { | |
| "status": "ok", | |
| "steps": len(getattr(self.agent, "logs", []) or []), | |
| "tools": "smolagents", | |
| } | |
| return answer, meta | |
| def _agent_code_url() -> str: | |
| space_id = os.getenv("SPACE_ID") | |
| if space_id: | |
| return f"https://huggingface.co/spaces/{space_id}/tree/main" | |
| return "https://huggingface.co/spaces/unknown/tree/main" | |
| def generate_answers(profile: gr.OAuthProfile | None): | |
| if not profile: | |
| return "Please login to Hugging Face first.", None, [], "" | |
| username = profile.username.strip() | |
| config = AgentConfig() | |
| try: | |
| questions = fetch_questions(config.api_base_url) | |
| except Exception as e: | |
| return f"Failed to fetch questions: {e}", None, [], username | |
| try: | |
| agent = GAIASmolAgent(config=config) | |
| except Exception as e: | |
| return f"Failed to initialize smolagents agent: {e}", None, [], username | |
| answers_payload: list[dict[str, str]] = [] | |
| rows: list[dict[str, Any]] = [] | |
| for item in questions: | |
| task_id = item.get("task_id") | |
| question_text = item.get("question") | |
| if not task_id or question_text is None: | |
| continue | |
| try: | |
| answer, meta = agent.solve_task(task_id=str(task_id), question=str(question_text)) | |
| answers_payload.append({"task_id": str(task_id), "submitted_answer": answer}) | |
| rows.append( | |
| { | |
| "Task ID": str(task_id), | |
| "Question": str(question_text), | |
| "Submitted Answer": answer, | |
| "Status": meta["status"], | |
| "Steps": meta["steps"], | |
| "Tools": meta["tools"], | |
| } | |
| ) | |
| except Exception as e: | |
| rows.append( | |
| { | |
| "Task ID": str(task_id), | |
| "Question": str(question_text), | |
| "Submitted Answer": "unknown", | |
| "Status": f"agent_error: {e}", | |
| "Steps": 0, | |
| "Tools": "smolagents", | |
| } | |
| ) | |
| if not answers_payload: | |
| return "No answers were generated.", pd.DataFrame(rows), [], username | |
| status = ( | |
| f"Generated {len(answers_payload)} answers for user '{username}'. " | |
| "Review the table, then click submit." | |
| ) | |
| return status, pd.DataFrame(rows), answers_payload, username | |
| def submit_generated_answers(answers_payload: list[dict[str, str]], username: str): | |
| if not username: | |
| return "Missing username in session. Click 'Generate Answers' after logging in." | |
| if not answers_payload: | |
| return "No generated answers found. Click 'Generate Answers' first." | |
| clean_answers: list[dict[str, str]] = [] | |
| for item in answers_payload: | |
| task_id = str(item.get("task_id", "")).strip() | |
| submitted = normalize_answer(str(item.get("submitted_answer", ""))) | |
| if not task_id: | |
| continue | |
| clean_answers.append({"task_id": task_id, "submitted_answer": submitted}) | |
| if not clean_answers: | |
| return "Generated answers are invalid or empty." | |
| payload = { | |
| "username": username, | |
| "agent_code": _agent_code_url(), | |
| "answers": clean_answers, | |
| } | |
| try: | |
| result = submit_answers(DEFAULT_API_URL, payload) | |
| return ( | |
| f"Submission Successful!\n" | |
| f"User: {result.get('username', username)}\n" | |
| f"Overall Score: {result.get('score', 'N/A')}% " | |
| f"({result.get('correct_count', '?')}/{result.get('total_attempted', '?')} correct)\n" | |
| f"Message: {result.get('message', 'No message received.')}" | |
| ) | |
| except requests.exceptions.HTTPError as e: | |
| detail = f"HTTP {e.response.status_code}" | |
| try: | |
| body = e.response.json() | |
| detail = f"{detail} - {body.get('detail', body)}" | |
| except Exception: | |
| detail = f"{detail} - {e.response.text[:500]}" | |
| return f"Submission failed: {detail}" | |
| except Exception as e: | |
| return f"Submission failed: {e}" | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# GAIA smolagents Runner") | |
| gr.Markdown( | |
| """ | |
| Two-step flow: | |
| 1. Generate answers for all tasks. | |
| 2. Submit generated answers to leaderboard scoring. | |
| Required Space secrets: | |
| - `OPENAI_API_KEY` | |
| Optional: | |
| - `OPENAI_MODEL` (default: `gpt-4o-mini`) | |
| - `OPENAI_API_BASE` (default: `https://api.openai.com/v1`) | |
| """ | |
| ) | |
| gr.LoginButton() | |
| generated_answers_state = gr.State([]) | |
| username_state = gr.State("") | |
| with gr.Row(): | |
| generate_button = gr.Button("1) Generate Answers", variant="primary") | |
| submit_button = gr.Button("2) Submit Generated Answers") | |
| status_output = gr.Textbox(label="Status", lines=6, interactive=False) | |
| results_table = gr.DataFrame(label="Generated Answers", wrap=True) | |
| generate_button.click( | |
| fn=generate_answers, | |
| outputs=[status_output, results_table, generated_answers_state, username_state], | |
| ) | |
| submit_button.click( | |
| fn=submit_generated_answers, | |
| inputs=[generated_answers_state, username_state], | |
| outputs=[status_output], | |
| ) | |
| if __name__ == "__main__": | |
| print("\n" + "-" * 30 + " App Starting " + "-" * 30) | |
| print(f"SPACE_HOST: {os.getenv('SPACE_HOST', 'not set')}") | |
| print(f"SPACE_ID: {os.getenv('SPACE_ID', 'not set')}") | |
| print("-" * (60 + len(" App Starting ")) + "\n") | |
| demo.launch(debug=True, share=False) | |