File size: 2,207 Bytes
3df5153
 
3d1b0c5
3df5153
 
6a1d4dd
 
3df5153
33cdb30
3d1b0c5
 
94b05f0
3df5153
3d1b0c5
 
 
3df5153
0617575
 
 
6d03c10
 
0617575
6d03c10
 
 
3d1b0c5
d2fa4ad
6d03c10
0617575
6d03c10
0617575
6d03c10
 
 
d2fa4ad
6d03c10
3d1b0c5
6d03c10
d2fa4ad
290dcc5
d2fa4ad
 
0617575
 
6d03c10
0617575
6d03c10
3d1b0c5
 
 
d2fa4ad
 
 
 
 
3d1b0c5
 
 
6d03c10
3d1b0c5
6d03c10
 
0617575
3d1b0c5
d6ca7ae
d2fa4ad
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import json
import os
import requests

from agent import solve_task, load_tasks
from mistral_hf_wrapper import MistralInference

API_URL = os.getenv("HF_MISTRAL_ENDPOINT")
API_TOKEN = os.getenv("HF_TOKEN")
USERNAME = os.getenv("HF_USERNAME")
CODE_LINK = os.getenv("HF_CODE_LINK")

def run_and_submit_all():
    model = MistralInference(api_url=API_URL, api_token=API_TOKEN)
    tasks = load_tasks()

    if not tasks:
        return "No tasks loaded from metadata.jsonl. Make sure the file exists and is valid."

    print(f"[INFO] Loaded {len(tasks)} tasks from metadata.jsonl")

    answers = []
    for i, task in enumerate(tasks):
        print(f"[INFO] Solving task {i+1}/{len(tasks)}: {task.get('question_id', 'N/A')}")

        try:
            result = solve_task(task, model)

            if not result.get("submitted_answer"):  # Check empty responses
                print("[WARN] Empty model response detected")
                result["submitted_answer"] = "ERROR: Empty model response"
            else:
                print(f"Answer: {result['submitted_answer'][:100]}...")

            answers.append(result)

        except Exception as e:
            print(f"[ERROR] Task failed: {e}")
            answers.append({
                "question_id": task.get("question_id", "UNKNOWN"),
                "submitted_answer": f"ERROR: {str(e)}"
            })

    if not answers:
        return "No answers generated. Check model response."

    print("[INFO] Submitting answers to GAIA benchmark API...")
    res = requests.post(
        "https://agents-course-unit4-scoring.hf.space/submit",
        headers={"Content-Type": "application/json"},
        json={
            "username": USERNAME,
            "agent_code": CODE_LINK,
            "answers": answers
        },
    )

    if res.ok:
        print(" Submission successful.")
        return json.dumps(res.json(), indent=2)

    print(f"[ERROR] Submission failed: {res.status_code} - {res.text}")
    return f"Error submitting: {res.status_code} - {res.text}"

# Gradio interface
gr.Interface(
    fn=run_and_submit_all,
    inputs=[],
    outputs="textbox",
    title="GAIA Benchmark Agent Submission"
).launch()