DiogoPinheiro commited on
Commit
4c02ed0
·
1 Parent(s): 81917a3

feat: implement base agent

Browse files
Files changed (5) hide show
  1. .gitignore +4 -0
  2. FinalAssignmentAgent.py +115 -0
  3. app.py +50 -58
  4. test.py +22 -0
  5. tools.py +181 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ .env
2
+ venv/
3
+ __pycache__/
4
+ *.pyc
FinalAssignmentAgent.py ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, InferenceClientModel
3
+ from tools import WikipediaSearchTool, CoherenceValidatorTool, MultimodalAnalysisTool, FileDownloaderTool
4
+ from huggingface_hub import login
5
+
6
+ class FinalAssignmentAgent:
7
+ login()
8
+ def __init__(self):
9
+ # Modelo HuggingFaceInferenceAPI otimizado
10
+ self.model = InferenceClientModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0)
11
+ self.wikipedia_tool = WikipediaSearchTool()
12
+ self.coherence_tool = CoherenceValidatorTool()
13
+ self.file_downloader = FileDownloaderTool()
14
+ self.multimodal_tool = MultimodalAnalysisTool()
15
+ self.validation_cache = {} # Cache para validações
16
+
17
+ self.code_agent = self.CodeSubAgent(self.model, self)
18
+ self.general_agent = self.GeneralSubAgent(self.model, self)
19
+
20
+ def classify_with_llm(self, question: str) -> str:
21
+ prompt = (
22
+ "Classify the following task as 'code' or 'general'. "
23
+ "Respond with only one word: 'code' or 'general'.\n"
24
+ f"Task: {question}"
25
+ )
26
+ messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
27
+ response = self.model(messages)
28
+ if hasattr(response, 'content'):
29
+ text = response.content.strip().lower()
30
+ else:
31
+ text = str(response).strip().lower()
32
+ print(f"[Router] Decisão da LLM: {text}")
33
+ return "code" if "code" in text else "general"
34
+
35
+ def validate_answer(self, question, answer):
36
+ # Heurística simples antes de chamar o validador
37
+ if len(answer) < 20 or "não sei" in answer.lower():
38
+ return "CRITIQUE: Resposta curta ou evasiva."
39
+ # Cache de validação
40
+ cache_key = f"{question}|||{answer}"
41
+ if cache_key in self.validation_cache:
42
+ return self.validation_cache[cache_key]
43
+ # Prompt enxuto
44
+ audit_prompt = (
45
+ f"Q: {question}\nA: {answer}\n"
46
+ "Evaluate if A answers Q directly, without errors or inconsistencies. "
47
+ "Reply only with 'COHERENCE_CHECK_PASSED' or 'CRITIQUE: ...'."
48
+ )
49
+ result = self.model(audit_prompt)
50
+ self.validation_cache[cache_key] = result
51
+ return result
52
+
53
+ def __call__(self, question: str) -> str:
54
+ agent_type = self.classify_with_llm(question)
55
+ # Prompt enxuto para o sistema
56
+ system_prompt = (
57
+ "You are a multimodal agent specialized in text, images, videos, and code. "
58
+ "Follow this workflow: 1) Identify the task type. 2) Make a short plan. "
59
+ "3) Only validate the plan if it involves multiple steps or tools. "
60
+ "4) Execute. 5) Before responding, validate the final answer. "
61
+ "If the validator returns CRITIQUE, correct and try again (max 2 attempts)."
62
+ )
63
+
64
+ if agent_type == "code":
65
+ return self.code_agent.run(question, system_prompt=system_prompt)
66
+ else:
67
+ return self.general_agent.run(question, system_prompt=system_prompt)
68
+
69
+ class CodeSubAgent:
70
+ def __init__(self, model, parent):
71
+ self.model = model
72
+ self.parent = parent
73
+ self.agent = CodeAgent(
74
+ tools=[],
75
+ model=model,
76
+ max_steps=2,
77
+ add_base_tools=True,
78
+ )
79
+
80
+ def run(self, question, system_prompt=None):
81
+ full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
82
+ answer = self.agent.run(full_query)
83
+ # Validação final otimizada
84
+ validation = self.parent.validate_answer(question, answer)
85
+ if "COHERENCE_CHECK_PASSED" in str(validation):
86
+ return answer
87
+ else:
88
+ # Tenta corrigir uma vez
89
+ answer2 = self.agent.run(full_query)
90
+ validation2 = self.parent.validate_answer(question, answer2)
91
+ return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2
92
+
93
+ class GeneralSubAgent:
94
+ def __init__(self, model, parent):
95
+ self.model = model
96
+ self.parent = parent
97
+ self.agent = CodeAgent(
98
+ tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), CoherenceValidatorTool(), MultimodalAnalysisTool(), FileDownloaderTool()],
99
+ add_base_tools=True,
100
+ max_steps=2,
101
+ model=model,
102
+ )
103
+
104
+ def run(self, question, system_prompt=None):
105
+ full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
106
+ answer = self.agent.run(full_query)
107
+ # Validação final otimizada
108
+ validation = self.parent.validate_answer(question, answer)
109
+ if "COHERENCE_CHECK_PASSED" in str(validation):
110
+ return answer
111
+ else:
112
+ # Tenta corrigir uma vez
113
+ answer2 = self.agent.run(full_query)
114
+ validation2 = self.parent.validate_answer(question, answer2)
115
+ return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2
app.py CHANGED
@@ -3,36 +3,26 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
 
6
 
7
  # (Keep Constants as is)
8
  # --- Constants ---
9
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
10
 
11
- # --- Basic Agent Definition ---
12
- # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
- class BasicAgent:
14
- def __init__(self):
15
- print("BasicAgent initialized.")
16
- def __call__(self, question: str) -> str:
17
- print(f"Agent received question (first 50 chars): {question[:50]}...")
18
- fixed_answer = "This is a default answer."
19
- print(f"Agent returning fixed answer: {fixed_answer}")
20
- return fixed_answer
21
-
22
  def run_and_submit_all( profile: gr.OAuthProfile | None):
23
  """
24
- Fetches all questions, runs the BasicAgent on them, submits all answers,
25
  and displays the results.
26
  """
27
  # --- Determine HF Space Runtime URL and Repo URL ---
28
- space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
29
 
30
  if profile:
31
  username= f"{profile.username}"
32
  print(f"User logged in: {username}")
33
  else:
34
  print("User not logged in.")
35
- return "Please Login to Hugging Face with the button.", None
36
 
37
  api_url = DEFAULT_API_URL
38
  questions_url = f"{api_url}/questions"
@@ -40,7 +30,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
40
 
41
  # 1. Instantiate Agent ( modify this part to create your agent)
42
  try:
43
- agent = BasicAgent()
44
  except Exception as e:
45
  print(f"Error instantiating agent: {e}")
46
  return f"Error initializing agent: {e}", None
@@ -96,52 +86,55 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
96
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
97
  print(status_update)
98
 
 
 
99
  # 5. Submit
100
- print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
101
- try:
102
- response = requests.post(submit_url, json=submission_data, timeout=60)
103
- response.raise_for_status()
104
- result_data = response.json()
105
- final_status = (
106
- f"Submission Successful!\n"
107
- f"User: {result_data.get('username')}\n"
108
- f"Overall Score: {result_data.get('score', 'N/A')}% "
109
- f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
110
- f"Message: {result_data.get('message', 'No message received.')}"
111
- )
112
- print("Submission successful.")
113
- results_df = pd.DataFrame(results_log)
114
- return final_status, results_df
115
- except requests.exceptions.HTTPError as e:
116
- error_detail = f"Server responded with status {e.response.status_code}."
117
- try:
118
- error_json = e.response.json()
119
- error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
120
- except requests.exceptions.JSONDecodeError:
121
- error_detail += f" Response: {e.response.text[:500]}"
122
- status_message = f"Submission Failed: {error_detail}"
123
- print(status_message)
124
- results_df = pd.DataFrame(results_log)
125
- return status_message, results_df
126
- except requests.exceptions.Timeout:
127
- status_message = "Submission Failed: The request timed out."
128
- print(status_message)
129
- results_df = pd.DataFrame(results_log)
130
- return status_message, results_df
131
- except requests.exceptions.RequestException as e:
132
- status_message = f"Submission Failed: Network error - {e}"
133
- print(status_message)
134
- results_df = pd.DataFrame(results_log)
135
- return status_message, results_df
136
- except Exception as e:
137
- status_message = f"An unexpected error occurred during submission: {e}"
138
- print(status_message)
139
- results_df = pd.DataFrame(results_log)
140
- return status_message, results_df
141
 
142
 
143
  # --- Build Gradio Interface using Blocks ---
144
  with gr.Blocks() as demo:
 
145
  gr.Markdown("# Basic Agent Evaluation Runner")
146
  gr.Markdown(
147
  """
@@ -173,9 +166,8 @@ with gr.Blocks() as demo:
173
 
174
  if __name__ == "__main__":
175
  print("\n" + "-"*30 + " App Starting " + "-"*30)
176
- # Check for SPACE_HOST and SPACE_ID at startup for information
177
  space_host_startup = os.getenv("SPACE_HOST")
178
- space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
179
 
180
  if space_host_startup:
181
  print(f"✅ SPACE_HOST found: {space_host_startup}")
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from FinalAssignmentAgent import FinalAssignmentAgent
7
 
8
  # (Keep Constants as is)
9
  # --- Constants ---
10
  DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
11
 
 
 
 
 
 
 
 
 
 
 
 
12
  def run_and_submit_all( profile: gr.OAuthProfile | None):
13
  """
14
+ Fetches all questions, runs the FinalAssignmentAgent on them, submits all answers,
15
  and displays the results.
16
  """
17
  # --- Determine HF Space Runtime URL and Repo URL ---
18
+ space_id = os.getenv("SPACE_ID")
19
 
20
  if profile:
21
  username= f"{profile.username}"
22
  print(f"User logged in: {username}")
23
  else:
24
  print("User not logged in.")
25
+ # return "Please Login to Hugging Face with the button.", None
26
 
27
  api_url = DEFAULT_API_URL
28
  questions_url = f"{api_url}/questions"
 
30
 
31
  # 1. Instantiate Agent ( modify this part to create your agent)
32
  try:
33
+ agent = FinalAssignmentAgent()
34
  except Exception as e:
35
  print(f"Error instantiating agent: {e}")
36
  return f"Error initializing agent: {e}", None
 
86
  status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
87
  print(status_update)
88
 
89
+ print(answers_payload)
90
+
91
  # 5. Submit
92
+ # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
93
+ # try:
94
+ # response = requests.post(submit_url, json=submission_data, timeout=60)
95
+ # response.raise_for_status()
96
+ # result_data = response.json()
97
+ # final_status = (
98
+ # f"Submission Successful!\n"
99
+ # f"User: {result_data.get('username')}\n"
100
+ # f"Overall Score: {result_data.get('score', 'N/A')}% "
101
+ # f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
102
+ # f"Message: {result_data.get('message', 'No message received.')}"
103
+ # )
104
+ # print("Submission successful.")
105
+ # results_df = pd.DataFrame(results_log)
106
+ # return final_status, results_df
107
+ # except requests.exceptions.HTTPError as e:
108
+ # error_detail = f"Server responded with status {e.response.status_code}."
109
+ # try:
110
+ # error_json = e.response.json()
111
+ # error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
112
+ # except requests.exceptions.JSONDecodeError:
113
+ # error_detail += f" Response: {e.response.text[:500]}"
114
+ # status_message = f"Submission Failed: {error_detail}"
115
+ # print(status_message)
116
+ # results_df = pd.DataFrame(results_log)
117
+ # return status_message, results_df
118
+ # except requests.exceptions.Timeout:
119
+ # status_message = "Submission Failed: The request timed out."
120
+ # print(status_message)
121
+ # results_df = pd.DataFrame(results_log)
122
+ # return status_message, results_df
123
+ # except requests.exceptions.RequestException as e:
124
+ # status_message = f"Submission Failed: Network error - {e}"
125
+ # print(status_message)
126
+ # results_df = pd.DataFrame(results_log)
127
+ # return status_message, results_df
128
+ # except Exception as e:
129
+ # status_message = f"An unexpected error occurred during submission: {e}"
130
+ # print(status_message)
131
+ # results_df = pd.DataFrame(results_log)
132
+ # return status_message, results_df
133
 
134
 
135
  # --- Build Gradio Interface using Blocks ---
136
  with gr.Blocks() as demo:
137
+
138
  gr.Markdown("# Basic Agent Evaluation Runner")
139
  gr.Markdown(
140
  """
 
166
 
167
  if __name__ == "__main__":
168
  print("\n" + "-"*30 + " App Starting " + "-"*30)
 
169
  space_host_startup = os.getenv("SPACE_HOST")
170
+ space_id_startup = os.getenv("SPACE_ID")
171
 
172
  if space_host_startup:
173
  print(f"✅ SPACE_HOST found: {space_host_startup}")
test.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from FinalAssignmentAgent import FinalAssignmentAgent
2
+
3
+ # Inicializa o agente
4
+ agent = FinalAssignmentAgent()
5
+
6
+ # Teste 1: Lógica e Busca
7
+ print("--- Teste de Busca e Lógica ---")
8
+ print("Quem foi Alan Turing e qual a sua principal contribuição?")
9
+ response = agent("Quem foi Alan Turing e qual a sua principal contribuição?")
10
+ print(f"Resposta: {response}")
11
+
12
+ # Teste 2: Código e Matemática
13
+ print("\n--- Teste de Código ---")
14
+ print("Calcule a raiz quadrada de 144 e multiplique por 5.")
15
+ response = agent("Calcule a raiz quadrada de 144 e multiplique por 5.")
16
+ print(f"Resposta: {response}")
17
+
18
+ # Teste 3: Task ID (Simulado)
19
+ # Nota: Este teste só funcionará se o servidor da Unit 4 estiver ativo
20
+ print("\n--- Teste de Task ID ---")
21
+ response = agent("What is in the file for task_id 100?")
22
+ print(f"Resposta: {response}")
tools.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool, DuckDuckGoSearchTool
2
+ from huggingface_hub import InferenceClient
3
+ import requests
4
+ import os
5
+ import cv2
6
+
7
+ class WikipediaSearchTool(Tool):
8
+ name = "wikipedia_search"
9
+ description = (
10
+ "Use this tool to find factual information, dates, and descriptions from Wikipedia. "
11
+ "It prioritizes official Wikipedia pages to ensure accuracy for names and historical events."
12
+ )
13
+ inputs = {
14
+ "query": {
15
+ "type": "string",
16
+ "description": "The person, place, event, or object to search for.",
17
+ },
18
+ "date_context": {
19
+ "type": "string",
20
+ "description": "Optional: A specific year or date to narrow down the search (e.g., '1969').",
21
+ "nullable": True
22
+ }
23
+ }
24
+ output_type = "string"
25
+
26
+ def __init__(self, *args, **kwargs):
27
+ super().__init__(*args, **kwargs)
28
+ # Initialize the base search engine as a component
29
+ self.search_engine = DuckDuckGoSearchTool()
30
+
31
+ def forward(self, query: str, date_context: str = None) -> str:
32
+ # 1. Build a specialized query
33
+ # Adding 'site:en.wikipedia.org' ensures the top results are from Wikipedia
34
+ refined_query = f"{query} site:en.wikipedia.org"
35
+
36
+ if date_context:
37
+ refined_query += f" \"{date_context}\""
38
+
39
+ try:
40
+ # 2. Execute the search
41
+ results = self.search_engine(refined_query)
42
+
43
+ if not results or "no results" in results.lower():
44
+ return f"No specific Wikipedia entry found for '{target_name}' with the given context."
45
+
46
+ return results
47
+ except Exception as e:
48
+ return f"An error occurred while searching Wikipedia: {str(e)}"
49
+
50
+ class CoherenceValidatorTool(Tool):
51
+ name = "coherence_validator"
52
+ description = "Checks if a plan or answer is semantically coherent and directly addresses the user request. It validates the relationship between names, ages, logic, and code without re-calculating everything."
53
+ inputs = {
54
+ "original_question": {
55
+ "type": "string",
56
+ "description": "The user's original query."
57
+ },
58
+ "proposed_content": {
59
+ "type": "string",
60
+ "description": "The plan, code snippet, or final answer to be audited."
61
+ }
62
+ }
63
+ output_type = "string"
64
+
65
+ def forward(self, original_question: str, proposed_content: str) -> str:
66
+ audit_prompt = f"""
67
+ Role: Senior Semantic Auditor
68
+ Task: Evaluate if the 'Proposed Content' is a coherent and valid response to the 'Original Question'.
69
+
70
+ Original Question: {original_question}
71
+ Proposed Content: {proposed_content}
72
+
73
+ Evaluation Criteria:
74
+ 1. RELEVANCE: Does the content directly address all parts of the question?
75
+ 2. ENTITY CONSISTENCY: Do names, ages, and dates remain consistent throughout the text?
76
+ 3. CODE/MATH LOGIC: Does the code or mathematical approach 'make sense' for this specific problem (e.g., not calculating temperature when asked for age)?
77
+ 4. ABSURDITY CHECK: Are there any hallucinations or impossible claims?
78
+
79
+ Instructions:
80
+ - If coherent, return: "COHERENCE_CHECK_PASSED"
81
+ - If NOT coherent, return: "CRITIQUE: [detailed explanation of what doesn't make sense]"
82
+ """
83
+ return audit_prompt
84
+
85
+ class FileDownloaderTool(Tool):
86
+ name = "file_downloader"
87
+ description = "Downloads task-related files (images or videos) from the evaluation server using a task_id."
88
+ inputs = {
89
+ "task_id": {
90
+ "type": "string",
91
+ "description": "The ID of the task to download the file for."
92
+ }
93
+ }
94
+ output_type = "string"
95
+
96
+ def forward(self, task_id: str) -> str:
97
+ api_url = "https://agents-course-unit4-scoring.hf.space/files"
98
+ try:
99
+ response = requests.get(f"{api_url}/{task_id}", timeout=15)
100
+ if response.status_code == 200:
101
+ # Detect extension
102
+ ctype = response.headers.get('Content-Type', '')
103
+ ext = ".mp4" if "video" in ctype else ".jpg"
104
+ filename = f"file_{task_id}{ext}"
105
+
106
+ with open(filename, "wb") as f:
107
+ f.write(response.content)
108
+ return filename # Returns the local path for other tools to use
109
+ return f"Error: Server returned status {response.status_code}"
110
+ except Exception as e:
111
+ return f"Download failed: {str(e)}"
112
+
113
+ class MultimodalAnalysisTool(Tool):
114
+ name = "multimodal_analyzer"
115
+ description = "Analyzes images and videos. For images, it provides a description. For videos, it extracts frames every 2 seconds to describe the sequence of events."
116
+ inputs = {
117
+ "file_path": {
118
+ "type": "string",
119
+ "description": "Local path to the image (.jpg, .png) or video (.mp4) file."
120
+ },
121
+ "query": {
122
+ "type": "string",
123
+ "description": "Optional: Specific question about the image or video content.",
124
+ "nullable": True
125
+ }
126
+ }
127
+ output_type = "string"
128
+
129
+ def forward(self, file_path: str, query: str = None) -> str:
130
+ client = InferenceClient(model="Salesforce/blip-image-captioning-large")
131
+
132
+ if not os.path.exists(file_path):
133
+ return f"Error: File {file_path} not found."
134
+
135
+ if file_path.lower().endswith(('.mp4', '.avi', '.mov')):
136
+ descriptions = []
137
+ video = cv2.VideoCapture(file_path)
138
+ fps = video.get(cv2.CAP_PROP_FPS)
139
+ if fps == 0: fps = 24
140
+ frame_interval = int(fps * 2)
141
+
142
+ count = 0
143
+ while True:
144
+ success, frame = video.read()
145
+ if not success:
146
+ break
147
+
148
+ if count % frame_interval == 0:
149
+ temp_frame = f"temp_frame_{count}.jpg"
150
+ cv2.imwrite(temp_frame, frame)
151
+ with open(temp_frame, "rb") as f:
152
+ # Analyze the specific frame
153
+ desc = client.image_to_text(f.read())
154
+ timestamp = count // fps
155
+ descriptions.append(f"At {timestamp}s: {desc}")
156
+ os.remove(temp_frame)
157
+ count += 1
158
+
159
+ video.release()
160
+ return "Video Content Summary: " + " | ".join(descriptions)
161
+ else:
162
+ try:
163
+ with open(file_path, "rb") as f:
164
+ image_data = f.read()
165
+ description = client.image_to_text(image_data)
166
+ return f"Image Analysis: {description}"
167
+ except Exception as e:
168
+ return f"Error analyzing image: {str(e)}"
169
+
170
+ def classify_with_llm(self, question: str) -> str:
171
+ prompt = (
172
+ "Classify the following task as 'code' or 'general'. "
173
+ "Respond with only one word: 'code' or 'general'.\n"
174
+ f"Task: {question}"
175
+ )
176
+ response = self.model(prompt)
177
+ if "code" in response.lower():
178
+ return "code"
179
+ else:
180
+ return "general"
181
+