DiogoPinheiro commited on
Commit
b419d18
·
1 Parent(s): 437f85b

chore: rollback

Browse files
Files changed (6) hide show
  1. .gitignore +0 -5
  2. FinalAssignmentAgent.py +0 -113
  3. requirements.txt +2 -5
  4. runtime.txt +0 -1
  5. test.py +0 -22
  6. tools.py +0 -181
.gitignore DELETED
@@ -1,5 +0,0 @@
1
- .env
2
- .python-version
3
- venv/
4
- __pycache__/
5
- *.pyc
 
 
 
 
 
 
FinalAssignmentAgent.py DELETED
@@ -1,113 +0,0 @@
1
- import os
2
- from smolagents import CodeAgent, DuckDuckGoSearchTool, InferenceClientModel
3
- from tools import WikipediaSearchTool, CoherenceValidatorTool, MultimodalAnalysisTool, FileDownloaderTool
4
- from huggingface_hub import login
5
-
6
- class FinalAssignmentAgent:
7
- def __init__(self):
8
- # Modelo HuggingFaceInferenceAPI otimizado
9
- self.model = InferenceClientModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0)
10
- self.wikipedia_tool = WikipediaSearchTool()
11
- self.coherence_tool = CoherenceValidatorTool()
12
- self.file_downloader = FileDownloaderTool()
13
- self.multimodal_tool = MultimodalAnalysisTool()
14
- self.validation_cache = {} # Cache para validações
15
-
16
- self.code_agent = self.CodeSubAgent(self.model, self)
17
- self.general_agent = self.GeneralSubAgent(self.model, self)
18
-
19
- def classify_with_llm(self, question: str) -> str:
20
- prompt = ("Classify the following task as 'code' or 'general'. "
21
- "Respond with only one word: 'code' or 'general'.\n"
22
- f"Task: {question}"
23
- )
24
- messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
25
- response = self.model(messages)
26
- if hasattr(response, 'content'):
27
- text = response.content.strip().lower()
28
- else:
29
- text = str(response).strip().lower()
30
- print(f"[Router] Decisão da LLM: {text}")
31
- return "code" if "code" in text else "general"
32
-
33
- def validate_answer(self, question, answer):
34
- # Heurística simples antes de chamar o validador
35
- if len(answer) < 20 or "não sei" in answer.lower():
36
- return "CRITIQUE: Resposta curta ou evasiva."
37
- # Cache de validação
38
- cache_key = f"{question}|||{answer}"
39
- if cache_key in self.validation_cache:
40
- return self.validation_cache[cache_key]
41
- # Prompt enxuto
42
- audit_prompt = (
43
- f"Q: {question}\nA: {answer}\n"
44
- "Evaluate if A answers Q directly, without errors or inconsistencies. "
45
- "Reply only with 'COHERENCE_CHECK_PASSED' or 'CRITIQUE: ...'."
46
- )
47
- result = self.model(audit_prompt)
48
- self.validation_cache[cache_key] = result
49
- return result
50
-
51
- def __call__(self, question: str) -> str:
52
- agent_type = self.classify_with_llm(question)
53
- # Prompt enxuto para o sistema
54
- system_prompt = (
55
- "You are a multimodal agent specialized in text, images, videos, and code. "
56
- "Follow this workflow: 1) Identify the task type. 2) Make a short plan. "
57
- "3) Only validate the plan if it involves multiple steps or tools. "
58
- "4) Execute. 5) Before responding, validate the final answer. "
59
- "If the validator returns CRITIQUE, correct and try again (max 2 attempts)."
60
- )
61
-
62
- if agent_type == "code":
63
- return self.code_agent.run(question, system_prompt=system_prompt)
64
- else:
65
- return self.general_agent.run(question, system_prompt=system_prompt)
66
-
67
- class CodeSubAgent:
68
- def __init__(self, model, parent):
69
- self.model = model
70
- self.parent = parent
71
- self.agent = CodeAgent(
72
- tools=[],
73
- model=model,
74
- max_steps=2,
75
- add_base_tools=True,
76
- )
77
-
78
- def run(self, question, system_prompt=None):
79
- full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
80
- answer = self.agent.run(full_query)
81
- # Validação final otimizada
82
- validation = self.parent.validate_answer(question, answer)
83
- if "COHERENCE_CHECK_PASSED" in str(validation):
84
- return answer
85
- else:
86
- # Tenta corrigir uma vez
87
- answer2 = self.agent.run(full_query)
88
- validation2 = self.parent.validate_answer(question, answer2)
89
- return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2
90
-
91
- class GeneralSubAgent:
92
- def __init__(self, model, parent):
93
- self.model = model
94
- self.parent = parent
95
- self.agent = CodeAgent(
96
- tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), CoherenceValidatorTool(), MultimodalAnalysisTool(), FileDownloaderTool()],
97
- add_base_tools=True,
98
- max_steps=2,
99
- model=model,
100
- )
101
-
102
- def run(self, question, system_prompt=None):
103
- full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
104
- answer = self.agent.run(full_query)
105
- # Validação final otimizada
106
- validation = self.parent.validate_answer(question, answer)
107
- if "COHERENCE_CHECK_PASSED" in str(validation):
108
- return answer
109
- else:
110
- # Tenta corrigir uma vez
111
- answer2 = self.agent.run(full_query)
112
- validation2 = self.parent.validate_answer(question, answer2)
113
- return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,5 +1,2 @@
1
- gradio[oauth]
2
- requests
3
- smolagents
4
- opencv-python
5
- huggingface_hub
 
1
+ gradio
2
+ requests
 
 
 
runtime.txt DELETED
@@ -1 +0,0 @@
1
- python-3.10
 
 
test.py DELETED
@@ -1,22 +0,0 @@
1
- from FinalAssignmentAgent import FinalAssignmentAgent
2
-
3
- # Inicializa o agente
4
- agent = FinalAssignmentAgent()
5
-
6
- # Teste 1: Lógica e Busca
7
- print("--- Teste de Busca e Lógica ---")
8
- print("Quem foi Alan Turing e qual a sua principal contribuição?")
9
- response = agent("Quem foi Alan Turing e qual a sua principal contribuição?")
10
- print(f"Resposta: {response}")
11
-
12
- # Teste 2: Código e Matemática
13
- print("\n--- Teste de Código ---")
14
- print("Calcule a raiz quadrada de 144 e multiplique por 5.")
15
- response = agent("Calcule a raiz quadrada de 144 e multiplique por 5.")
16
- print(f"Resposta: {response}")
17
-
18
- # Teste 3: Task ID (Simulado)
19
- # Nota: Este teste só funcionará se o servidor da Unit 4 estiver ativo
20
- print("\n--- Teste de Task ID ---")
21
- response = agent("What is in the file for task_id 100?")
22
- print(f"Resposta: {response}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tools.py DELETED
@@ -1,181 +0,0 @@
1
- from smolagents import Tool, DuckDuckGoSearchTool
2
- from huggingface_hub import InferenceClient
3
- import requests
4
- import os
5
- import cv2
6
-
7
- class WikipediaSearchTool(Tool):
8
- name = "wikipedia_search"
9
- description = (
10
- "Use this tool to find factual information, dates, and descriptions from Wikipedia. "
11
- "It prioritizes official Wikipedia pages to ensure accuracy for names and historical events."
12
- )
13
- inputs = {
14
- "query": {
15
- "type": "string",
16
- "description": "The person, place, event, or object to search for.",
17
- },
18
- "date_context": {
19
- "type": "string",
20
- "description": "Optional: A specific year or date to narrow down the search (e.g., '1969').",
21
- "nullable": True
22
- }
23
- }
24
- output_type = "string"
25
-
26
- def __init__(self, *args, **kwargs):
27
- super().__init__(*args, **kwargs)
28
- # Initialize the base search engine as a component
29
- self.search_engine = DuckDuckGoSearchTool()
30
-
31
- def forward(self, query: str, date_context: str = None) -> str:
32
- # 1. Build a specialized query
33
- # Adding 'site:en.wikipedia.org' ensures the top results are from Wikipedia
34
- refined_query = f"{query} site:en.wikipedia.org"
35
-
36
- if date_context:
37
- refined_query += f" \"{date_context}\""
38
-
39
- try:
40
- # 2. Execute the search
41
- results = self.search_engine(refined_query)
42
-
43
- if not results or "no results" in results.lower():
44
- return f"No specific Wikipedia entry found for '{target_name}' with the given context."
45
-
46
- return results
47
- except Exception as e:
48
- return f"An error occurred while searching Wikipedia: {str(e)}"
49
-
50
- class CoherenceValidatorTool(Tool):
51
- name = "coherence_validator"
52
- description = "Checks if a plan or answer is semantically coherent and directly addresses the user request. It validates the relationship between names, ages, logic, and code without re-calculating everything."
53
- inputs = {
54
- "original_question": {
55
- "type": "string",
56
- "description": "The user's original query."
57
- },
58
- "proposed_content": {
59
- "type": "string",
60
- "description": "The plan, code snippet, or final answer to be audited."
61
- }
62
- }
63
- output_type = "string"
64
-
65
- def forward(self, original_question: str, proposed_content: str) -> str:
66
- audit_prompt = f"""
67
- Role: Senior Semantic Auditor
68
- Task: Evaluate if the 'Proposed Content' is a coherent and valid response to the 'Original Question'.
69
-
70
- Original Question: {original_question}
71
- Proposed Content: {proposed_content}
72
-
73
- Evaluation Criteria:
74
- 1. RELEVANCE: Does the content directly address all parts of the question?
75
- 2. ENTITY CONSISTENCY: Do names, ages, and dates remain consistent throughout the text?
76
- 3. CODE/MATH LOGIC: Does the code or mathematical approach 'make sense' for this specific problem (e.g., not calculating temperature when asked for age)?
77
- 4. ABSURDITY CHECK: Are there any hallucinations or impossible claims?
78
-
79
- Instructions:
80
- - If coherent, return: "COHERENCE_CHECK_PASSED"
81
- - If NOT coherent, return: "CRITIQUE: [detailed explanation of what doesn't make sense]"
82
- """
83
- return audit_prompt
84
-
85
- class FileDownloaderTool(Tool):
86
- name = "file_downloader"
87
- description = "Downloads task-related files (images or videos) from the evaluation server using a task_id."
88
- inputs = {
89
- "task_id": {
90
- "type": "string",
91
- "description": "The ID of the task to download the file for."
92
- }
93
- }
94
- output_type = "string"
95
-
96
- def forward(self, task_id: str) -> str:
97
- api_url = "https://agents-course-unit4-scoring.hf.space/files"
98
- try:
99
- response = requests.get(f"{api_url}/{task_id}", timeout=15)
100
- if response.status_code == 200:
101
- # Detect extension
102
- ctype = response.headers.get('Content-Type', '')
103
- ext = ".mp4" if "video" in ctype else ".jpg"
104
- filename = f"file_{task_id}{ext}"
105
-
106
- with open(filename, "wb") as f:
107
- f.write(response.content)
108
- return filename # Returns the local path for other tools to use
109
- return f"Error: Server returned status {response.status_code}"
110
- except Exception as e:
111
- return f"Download failed: {str(e)}"
112
-
113
- class MultimodalAnalysisTool(Tool):
114
- name = "multimodal_analyzer"
115
- description = "Analyzes images and videos. For images, it provides a description. For videos, it extracts frames every 2 seconds to describe the sequence of events."
116
- inputs = {
117
- "file_path": {
118
- "type": "string",
119
- "description": "Local path to the image (.jpg, .png) or video (.mp4) file."
120
- },
121
- "query": {
122
- "type": "string",
123
- "description": "Optional: Specific question about the image or video content.",
124
- "nullable": True
125
- }
126
- }
127
- output_type = "string"
128
-
129
- def forward(self, file_path: str, query: str = None) -> str:
130
- client = InferenceClient(model="Salesforce/blip-image-captioning-large")
131
-
132
- if not os.path.exists(file_path):
133
- return f"Error: File {file_path} not found."
134
-
135
- if file_path.lower().endswith(('.mp4', '.avi', '.mov')):
136
- descriptions = []
137
- video = cv2.VideoCapture(file_path)
138
- fps = video.get(cv2.CAP_PROP_FPS)
139
- if fps == 0: fps = 24
140
- frame_interval = int(fps * 2)
141
-
142
- count = 0
143
- while True:
144
- success, frame = video.read()
145
- if not success:
146
- break
147
-
148
- if count % frame_interval == 0:
149
- temp_frame = f"temp_frame_{count}.jpg"
150
- cv2.imwrite(temp_frame, frame)
151
- with open(temp_frame, "rb") as f:
152
- # Analyze the specific frame
153
- desc = client.image_to_text(f.read())
154
- timestamp = count // fps
155
- descriptions.append(f"At {timestamp}s: {desc}")
156
- os.remove(temp_frame)
157
- count += 1
158
-
159
- video.release()
160
- return "Video Content Summary: " + " | ".join(descriptions)
161
- else:
162
- try:
163
- with open(file_path, "rb") as f:
164
- image_data = f.read()
165
- description = client.image_to_text(image_data)
166
- return f"Image Analysis: {description}"
167
- except Exception as e:
168
- return f"Error analyzing image: {str(e)}"
169
-
170
- def classify_with_llm(self, question: str) -> str:
171
- prompt = (
172
- "Classify the following task as 'code' or 'general'. "
173
- "Respond with only one word: 'code' or 'general'.\n"
174
- f"Task: {question}"
175
- )
176
- response = self.model(prompt)
177
- if "code" in response.lower():
178
- return "code"
179
- else:
180
- return "general"
181
-