Spaces:
Sleeping
Sleeping
File size: 2,207 Bytes
3df5153 3d1b0c5 3df5153 6a1d4dd 3df5153 33cdb30 3d1b0c5 94b05f0 3df5153 3d1b0c5 3df5153 0617575 6d03c10 0617575 6d03c10 3d1b0c5 d2fa4ad 6d03c10 0617575 6d03c10 0617575 6d03c10 d2fa4ad 6d03c10 3d1b0c5 6d03c10 d2fa4ad 290dcc5 d2fa4ad 0617575 6d03c10 0617575 6d03c10 3d1b0c5 d2fa4ad 3d1b0c5 6d03c10 3d1b0c5 6d03c10 0617575 3d1b0c5 d6ca7ae d2fa4ad | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 | import gradio as gr
import json
import os
import requests
from agent import solve_task, load_tasks
from mistral_hf_wrapper import MistralInference
API_URL = os.getenv("HF_MISTRAL_ENDPOINT")
API_TOKEN = os.getenv("HF_TOKEN")
USERNAME = os.getenv("HF_USERNAME")
CODE_LINK = os.getenv("HF_CODE_LINK")
def run_and_submit_all():
model = MistralInference(api_url=API_URL, api_token=API_TOKEN)
tasks = load_tasks()
if not tasks:
return "No tasks loaded from metadata.jsonl. Make sure the file exists and is valid."
print(f"[INFO] Loaded {len(tasks)} tasks from metadata.jsonl")
answers = []
for i, task in enumerate(tasks):
print(f"[INFO] Solving task {i+1}/{len(tasks)}: {task.get('question_id', 'N/A')}")
try:
result = solve_task(task, model)
if not result.get("submitted_answer"): # Check empty responses
print("[WARN] Empty model response detected")
result["submitted_answer"] = "ERROR: Empty model response"
else:
print(f"Answer: {result['submitted_answer'][:100]}...")
answers.append(result)
except Exception as e:
print(f"[ERROR] Task failed: {e}")
answers.append({
"question_id": task.get("question_id", "UNKNOWN"),
"submitted_answer": f"ERROR: {str(e)}"
})
if not answers:
return "No answers generated. Check model response."
print("[INFO] Submitting answers to GAIA benchmark API...")
res = requests.post(
"https://agents-course-unit4-scoring.hf.space/submit",
headers={"Content-Type": "application/json"},
json={
"username": USERNAME,
"agent_code": CODE_LINK,
"answers": answers
},
)
if res.ok:
print(" Submission successful.")
return json.dumps(res.json(), indent=2)
print(f"[ERROR] Submission failed: {res.status_code} - {res.text}")
return f"Error submitting: {res.status_code} - {res.text}"
# Gradio interface
gr.Interface(
fn=run_and_submit_all,
inputs=[],
outputs="textbox",
title="GAIA Benchmark Agent Submission"
).launch() |