Macmill commited on
Commit
f1ec76c
·
verified ·
1 Parent(s): e918e46

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -30
app.py CHANGED
@@ -15,7 +15,6 @@ AGENT_LOAD_ERROR = ""
15
  AGENT_FUNCTION_NAME = "run_gaia_task" # Define the target function name
16
 
17
  try:
18
- # --- MODIFIED: Import the correct function ---
19
  from final_agent import run_gaia_task
20
  print(f"Successfully imported {AGENT_FUNCTION_NAME} from final_agent.py")
21
  AGENT_AVAILABLE = True
@@ -24,15 +23,12 @@ except ImportError as e:
24
  print(error_msg)
25
  AGENT_LOAD_ERROR = error_msg
26
  except Exception as e:
27
- # Catch errors during the global setup within final_agent.py
28
  error_msg = f"ERROR during import or initial setup in final_agent.py: {e}"
29
  print(error_msg)
30
  traceback.print_exc()
31
  AGENT_LOAD_ERROR = error_msg
32
 
33
- # --- MODIFIED: Define a dummy function matching the new signature ---
34
  if not AGENT_AVAILABLE:
35
- # This dummy function will be used if the import fails
36
  def run_gaia_task(task_description: str) -> str:
37
  """Dummy function used when the real agent fails to load."""
38
  print(f"Executing dummy {AGENT_FUNCTION_NAME} because agent failed to load.")
@@ -44,32 +40,21 @@ class AgentRunner:
44
  print("AgentRunner initialized.")
45
  if not AGENT_AVAILABLE:
46
  print(f"WARNING: Agent function failed to load during startup. Error: {AGENT_LOAD_ERROR}")
47
- # Optional: Add environment variable checks if needed
48
- # if not os.getenv("GROQ_API_KEY") or not os.getenv("TAVILY_API_KEY"):
49
- # print("WARNING: Required API keys might not be set in Space secrets.")
50
 
51
  def __call__(self, question: str) -> str:
52
  """Runs the imported agent function on a single question."""
53
  print(f"\n--- AgentRunner received question: {question[:100]}... ---")
54
- # Always call the potentially dummy function; it returns error if needed
55
  try:
56
- # --- MODIFIED: Call the new agent function ---
57
- # The 'question' variable holds the task description.
58
- # The new agent handles file paths internally based on the description.
59
  final_answer = run_gaia_task(task_description=question)
60
-
61
- # Ensure result is always a string for submission
62
  final_answer_str = str(final_answer)
63
  print(f"--- AgentRunner returning answer: {final_answer_str} ---")
64
  return final_answer_str
65
  except Exception as e:
66
- # Catch unexpected errors during the function call itself
67
  print(f"!!! ERROR calling {AGENT_FUNCTION_NAME} function: {e} !!!")
68
- traceback.print_exc() # Log the full error to Space logs
69
- # --- MODIFIED: Update error message ---
70
  return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' failed during execution - {e}"
71
 
72
- # --- Submission Logic (Largely Unchanged) ---
73
  def run_and_submit_all( profile: gr.OAuthProfile | None):
74
  """Fetches questions, runs agent, submits answers."""
75
  space_id = os.getenv("SPACE_ID")
@@ -82,9 +67,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
82
  # 1. Instantiate Agent Runner
83
  try:
84
  agent = AgentRunner()
85
- # Check if agent loaded correctly before proceeding
86
  if not AGENT_AVAILABLE:
87
- # --- MODIFIED: Update error message ---
88
  return f"Agent function '{AGENT_FUNCTION_NAME}' failed to load. Check logs. Error: {AGENT_LOAD_ERROR}", None
89
  except Exception as e: print(f"Error instantiating AgentRunner: {e}"); return f"Init error: {e}", None
90
 
@@ -106,18 +89,16 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
106
  question_count = len(questions_data)
107
  for i, item in enumerate(questions_data):
108
  task_id = item.get("task_id"); question_text = item.get("question")
109
- print(f"\n--- Processing Question {i+1}/{question_count} (ID: {task_id}) ---") # Add progress logging
110
  if not task_id or question_text is None: print(f"Skipping item: {item}"); continue
111
  try:
112
- # Calls AgentRunner.__call__, which now calls run_gaia_task
113
  submitted_answer = agent(question_text)
114
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
115
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
116
  except Exception as e:
117
- # Catch errors during the agent's execution on a specific task
118
  print(f"!! Error running agent on task {task_id}: {e} !!"); traceback.print_exc()
119
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUN ERROR: {e}"})
120
- answers_payload.append({"task_id": task_id, "submitted_answer": f"AGENT RUN ERROR: {e}"}) # Submit error
121
 
122
  if not answers_payload: print("Agent produced no answers."); return "Agent produced no answers.", pd.DataFrame(results_log)
123
 
@@ -127,7 +108,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
127
 
128
  # 5. Submit
129
  try:
130
- response = requests.post(submit_url, json=submission_data, timeout=120); response.raise_for_status() # Increased timeout
131
  result_data = response.json()
132
  final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
133
  f"Overall Score: {result_data.get('score', 'N/A')}% "
@@ -145,7 +126,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
145
  print(status_message); results_df = pd.DataFrame(results_log); return status_message, results_df
146
 
147
 
148
- # --- Build Gradio Interface (Unchanged) ---
149
  with gr.Blocks() as demo:
150
  gr.Markdown("# GAIA Agent Evaluation Runner")
151
  gr.Markdown(
@@ -158,19 +139,20 @@ with gr.Blocks() as demo:
158
  **Disclaimers:** Execution can take significant time depending on the number of questions and agent complexity.
159
  """
160
  )
161
- gr.LoginButton()
162
  run_button = gr.Button("Run Evaluation & Submit All Answers")
163
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
164
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
165
- run_button.click(fn=run_and_submit_all, inputs=gr.State(None), outputs=[status_output, results_table]) # Pass None for profile initially
166
 
167
- # --- Main execution block (Unchanged) ---
 
 
 
 
168
  if __name__ == "__main__":
169
  print("\n" + "-"*30 + " App Starting " + "-"*30)
170
- # Perform startup checks if needed (e.g., check essential env vars)
171
  if not AGENT_AVAILABLE:
172
  print(f"CRITICAL WARNING: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. The app will run but agent calls will fail.")
173
  print(f"Load Error Details: {AGENT_LOAD_ERROR}")
174
  print("Launching Gradio Interface...")
175
- # Consider removing debug=True for "production" submission space
176
  demo.launch(debug=False, share=False)
 
15
  AGENT_FUNCTION_NAME = "run_gaia_task" # Define the target function name
16
 
17
  try:
 
18
  from final_agent import run_gaia_task
19
  print(f"Successfully imported {AGENT_FUNCTION_NAME} from final_agent.py")
20
  AGENT_AVAILABLE = True
 
23
  print(error_msg)
24
  AGENT_LOAD_ERROR = error_msg
25
  except Exception as e:
 
26
  error_msg = f"ERROR during import or initial setup in final_agent.py: {e}"
27
  print(error_msg)
28
  traceback.print_exc()
29
  AGENT_LOAD_ERROR = error_msg
30
 
 
31
  if not AGENT_AVAILABLE:
 
32
  def run_gaia_task(task_description: str) -> str:
33
  """Dummy function used when the real agent fails to load."""
34
  print(f"Executing dummy {AGENT_FUNCTION_NAME} because agent failed to load.")
 
40
  print("AgentRunner initialized.")
41
  if not AGENT_AVAILABLE:
42
  print(f"WARNING: Agent function failed to load during startup. Error: {AGENT_LOAD_ERROR}")
 
 
 
43
 
44
  def __call__(self, question: str) -> str:
45
  """Runs the imported agent function on a single question."""
46
  print(f"\n--- AgentRunner received question: {question[:100]}... ---")
 
47
  try:
 
 
 
48
  final_answer = run_gaia_task(task_description=question)
 
 
49
  final_answer_str = str(final_answer)
50
  print(f"--- AgentRunner returning answer: {final_answer_str} ---")
51
  return final_answer_str
52
  except Exception as e:
 
53
  print(f"!!! ERROR calling {AGENT_FUNCTION_NAME} function: {e} !!!")
54
+ traceback.print_exc()
 
55
  return f"ERROR: Agent function '{AGENT_FUNCTION_NAME}' failed during execution - {e}"
56
 
57
+ # --- Submission Logic ---
58
  def run_and_submit_all( profile: gr.OAuthProfile | None):
59
  """Fetches questions, runs agent, submits answers."""
60
  space_id = os.getenv("SPACE_ID")
 
67
  # 1. Instantiate Agent Runner
68
  try:
69
  agent = AgentRunner()
 
70
  if not AGENT_AVAILABLE:
 
71
  return f"Agent function '{AGENT_FUNCTION_NAME}' failed to load. Check logs. Error: {AGENT_LOAD_ERROR}", None
72
  except Exception as e: print(f"Error instantiating AgentRunner: {e}"); return f"Init error: {e}", None
73
 
 
89
  question_count = len(questions_data)
90
  for i, item in enumerate(questions_data):
91
  task_id = item.get("task_id"); question_text = item.get("question")
92
+ print(f"\n--- Processing Question {i+1}/{question_count} (ID: {task_id}) ---")
93
  if not task_id or question_text is None: print(f"Skipping item: {item}"); continue
94
  try:
 
95
  submitted_answer = agent(question_text)
96
  answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
97
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
98
  except Exception as e:
 
99
  print(f"!! Error running agent on task {task_id}: {e} !!"); traceback.print_exc()
100
  results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT RUN ERROR: {e}"})
101
+ answers_payload.append({"task_id": task_id, "submitted_answer": f"AGENT RUN ERROR: {e}"})
102
 
103
  if not answers_payload: print("Agent produced no answers."); return "Agent produced no answers.", pd.DataFrame(results_log)
104
 
 
108
 
109
  # 5. Submit
110
  try:
111
+ response = requests.post(submit_url, json=submission_data, timeout=120); response.raise_for_status()
112
  result_data = response.json()
113
  final_status = (f"Submission Successful!\nUser: {result_data.get('username')}\n"
114
  f"Overall Score: {result_data.get('score', 'N/A')}% "
 
126
  print(status_message); results_df = pd.DataFrame(results_log); return status_message, results_df
127
 
128
 
129
+ # --- Build Gradio Interface ---
130
  with gr.Blocks() as demo:
131
  gr.Markdown("# GAIA Agent Evaluation Runner")
132
  gr.Markdown(
 
139
  **Disclaimers:** Execution can take significant time depending on the number of questions and agent complexity.
140
  """
141
  )
142
+ login_button = gr.LoginButton() # Assign to variable to access profile info implicitly
143
  run_button = gr.Button("Run Evaluation & Submit All Answers")
144
  status_output = gr.Textbox(label="Run Status / Submission Result", lines=5, interactive=False)
145
  results_table = gr.DataFrame(label="Questions and Agent Answers", wrap=True)
 
146
 
147
+ # --- CORRECTED LINE ---
148
+ # Remove the 'inputs' argument. The profile is passed implicitly because of LoginButton.
149
+ run_button.click(fn=run_and_submit_all, outputs=[status_output, results_table])
150
+
151
+ # --- Main execution block ---
152
  if __name__ == "__main__":
153
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
154
  if not AGENT_AVAILABLE:
155
  print(f"CRITICAL WARNING: Agent function '{AGENT_FUNCTION_NAME}' could not be loaded. The app will run but agent calls will fail.")
156
  print(f"Load Error Details: {AGENT_LOAD_ERROR}")
157
  print("Launching Gradio Interface...")
 
158
  demo.launch(debug=False, share=False)