Final_Assignment_Template

Sleeping

App Files Files Community

DiogoPinheiro commited on Mar 27

Commit

4c02ed0

1 Parent(s): 81917a3

feat: implement base agent

Browse files

Files changed (5) hide show

.gitignore +4 -0
FinalAssignmentAgent.py +115 -0
app.py +50 -58
test.py +22 -0
tools.py +181 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+venv/
+__pycache__/
+*.pyc

FinalAssignmentAgent.py ADDED Viewed

	@@ -0,0 +1,115 @@

+import os
+from smolagents import CodeAgent, DuckDuckGoSearchTool, LiteLLMModel, InferenceClientModel
+from tools import WikipediaSearchTool, CoherenceValidatorTool, MultimodalAnalysisTool, FileDownloaderTool
+from huggingface_hub import login
+class FinalAssignmentAgent:
+    login()
+    def __init__(self):
+        # Modelo HuggingFaceInferenceAPI otimizado
+        self.model = InferenceClientModel(model="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0)
+        self.wikipedia_tool = WikipediaSearchTool()
+        self.coherence_tool = CoherenceValidatorTool()
+        self.file_downloader = FileDownloaderTool()
+        self.multimodal_tool = MultimodalAnalysisTool()
+        self.validation_cache = {}  # Cache para validações
+        self.code_agent = self.CodeSubAgent(self.model, self)
+        self.general_agent = self.GeneralSubAgent(self.model, self)
+    def classify_with_llm(self, question: str) -> str:
+        prompt = (
+            "Classify the following task as 'code' or 'general'. "
+            "Respond with only one word: 'code' or 'general'.\n"
+            f"Task: {question}"
+        )
+        messages = [{"role": "user", "content": [{"type": "text", "text": prompt}]}]
+        response = self.model(messages)
+        if hasattr(response, 'content'):
+            text = response.content.strip().lower()
+        else:
+            text = str(response).strip().lower()
+        print(f"[Router] Decisão da LLM: {text}")
+        return "code" if "code" in text else "general"
+    def validate_answer(self, question, answer):
+        # Heurística simples antes de chamar o validador
+        if len(answer) < 20 or "não sei" in answer.lower():
+            return "CRITIQUE: Resposta curta ou evasiva."
+        # Cache de validação
+        cache_key = f"{question}|||{answer}"
+        if cache_key in self.validation_cache:
+            return self.validation_cache[cache_key]
+        # Prompt enxuto
+        audit_prompt = (
+            f"Q: {question}\nA: {answer}\n"
+            "Evaluate if A answers Q directly, without errors or inconsistencies. "
+            "Reply only with 'COHERENCE_CHECK_PASSED' or 'CRITIQUE: ...'."
+        )
+        result = self.model(audit_prompt)
+        self.validation_cache[cache_key] = result
+        return result
+    def __call__(self, question: str) -> str:
+        agent_type = self.classify_with_llm(question)
+        # Prompt enxuto para o sistema
+        system_prompt = (
+            "You are a multimodal agent specialized in text, images, videos, and code. "
+            "Follow this workflow: 1) Identify the task type. 2) Make a short plan. "
+            "3) Only validate the plan if it involves multiple steps or tools. "
+            "4) Execute. 5) Before responding, validate the final answer. "
+            "If the validator returns CRITIQUE, correct and try again (max 2 attempts)."
+        )
+        if agent_type == "code":
+            return self.code_agent.run(question, system_prompt=system_prompt)
+        else:
+            return self.general_agent.run(question, system_prompt=system_prompt)
+    class CodeSubAgent:
+        def __init__(self, model, parent):
+            self.model = model
+            self.parent = parent
+            self.agent = CodeAgent(
+                tools=[],
+                model=model,
+                max_steps=2,
+                add_base_tools=True,
+            )
+        def run(self, question, system_prompt=None):
+            full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
+            answer = self.agent.run(full_query)
+            # Validação final otimizada
+            validation = self.parent.validate_answer(question, answer)
+            if "COHERENCE_CHECK_PASSED" in str(validation):
+                return answer
+            else:
+                # Tenta corrigir uma vez
+                answer2 = self.agent.run(full_query)
+                validation2 = self.parent.validate_answer(question, answer2)
+                return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2
+    class GeneralSubAgent:
+        def __init__(self, model, parent):
+            self.model = model
+            self.parent = parent
+            self.agent = CodeAgent(
+                tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), CoherenceValidatorTool(), MultimodalAnalysisTool(), FileDownloaderTool()],
+                add_base_tools=True,
+                max_steps=2,
+                model=model,
+            )
+        def run(self, question, system_prompt=None):
+            full_query = f"{system_prompt}\n\nTask: {question}" if system_prompt else question
+            answer = self.agent.run(full_query)
+            # Validação final otimizada
+            validation = self.parent.validate_answer(question, answer)
+            if "COHERENCE_CHECK_PASSED" in str(validation):
+                return answer
+            else:
+                # Tenta corrigir uma vez
+                answer2 = self.agent.run(full_query)
+                validation2 = self.parent.validate_answer(question, answer2)
+                return answer2 if "COHERENCE_CHECK_PASSED" in str(validation2) else validation2

app.py CHANGED Viewed

@@ -3,36 +3,26 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
-# --- Basic Agent Definition ---
-# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
-class BasicAgent:
-    def __init__(self):
-        print("BasicAgent initialized.")
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
-        fixed_answer = "This is a default answer."
-        print(f"Agent returning fixed answer: {fixed_answer}")
-        return fixed_answer
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
-    Fetches all questions, runs the BasicAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
-    space_id = os.getenv("SPACE_ID") # Get the SPACE_ID for sending link to the code
     if profile:
         username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
-        return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
@@ -40,7 +30,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
-        agent = BasicAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
@@ -96,52 +86,55 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
     # 5. Submit
-    print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
-    try:
-        response = requests.post(submit_url, json=submission_data, timeout=60)
-        response.raise_for_status()
-        result_data = response.json()
-        final_status = (
-            f"Submission Successful!\n"
-            f"User: {result_data.get('username')}\n"
-            f"Overall Score: {result_data.get('score', 'N/A')}% "
-            f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
-            f"Message: {result_data.get('message', 'No message received.')}"
-        )
-        print("Submission successful.")
-        results_df = pd.DataFrame(results_log)
-        return final_status, results_df
-    except requests.exceptions.HTTPError as e:
-        error_detail = f"Server responded with status {e.response.status_code}."
-        try:
-            error_json = e.response.json()
-            error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
-        except requests.exceptions.JSONDecodeError:
-            error_detail += f" Response: {e.response.text[:500]}"
-        status_message = f"Submission Failed: {error_detail}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.Timeout:
-        status_message = "Submission Failed: The request timed out."
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except requests.exceptions.RequestException as e:
-        status_message = f"Submission Failed: Network error - {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
-    except Exception as e:
-        status_message = f"An unexpected error occurred during submission: {e}"
-        print(status_message)
-        results_df = pd.DataFrame(results_log)
-        return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
@@ -173,9 +166,8 @@ with gr.Blocks() as demo:
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
-    # Check for SPACE_HOST and SPACE_ID at startup for information
     space_host_startup = os.getenv("SPACE_HOST")
-    space_id_startup = os.getenv("SPACE_ID") # Get SPACE_ID at startup
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")

 import requests
 import inspect
 import pandas as pd
+from FinalAssignmentAgent import FinalAssignmentAgent
 # (Keep Constants as is)
 # --- Constants ---
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
+    Fetches all questions, runs the FinalAssignmentAgent on them, submits all answers,
     and displays the results.
     """
     # --- Determine HF Space Runtime URL and Repo URL ---
+    space_id = os.getenv("SPACE_ID")
     if profile:
         username= f"{profile.username}"
         print(f"User logged in: {username}")
     else:
         print("User not logged in.")
+        # return "Please Login to Hugging Face with the button.", None
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
+        agent = FinalAssignmentAgent()
     except Exception as e:
         print(f"Error instantiating agent: {e}")
         return f"Error initializing agent: {e}", None
     status_update = f"Agent finished. Submitting {len(answers_payload)} answers for user '{username}'..."
     print(status_update)
+    print(answers_payload)
     # 5. Submit
+    # print(f"Submitting {len(answers_payload)} answers to: {submit_url}")
+    # try:
+    #     response = requests.post(submit_url, json=submission_data, timeout=60)
+    #     response.raise_for_status()
+    #     result_data = response.json()
+    #     final_status = (
+    #         f"Submission Successful!\n"
+    #         f"User: {result_data.get('username')}\n"
+    #         f"Overall Score: {result_data.get('score', 'N/A')}% "
+    #         f"({result_data.get('correct_count', '?')}/{result_data.get('total_attempted', '?')} correct)\n"
+    #         f"Message: {result_data.get('message', 'No message received.')}"
+    #     )
+    #     print("Submission successful.")
+    #     results_df = pd.DataFrame(results_log)
+    #     return final_status, results_df
+    # except requests.exceptions.HTTPError as e:
+    #     error_detail = f"Server responded with status {e.response.status_code}."
+    #     try:
+    #         error_json = e.response.json()
+    #         error_detail += f" Detail: {error_json.get('detail', e.response.text)}"
+    #     except requests.exceptions.JSONDecodeError:
+    #         error_detail += f" Response: {e.response.text[:500]}"
+    #     status_message = f"Submission Failed: {error_detail}"
+    #     print(status_message)
+    #     results_df = pd.DataFrame(results_log)
+    #     return status_message, results_df
+    # except requests.exceptions.Timeout:
+    #     status_message = "Submission Failed: The request timed out."
+    #     print(status_message)
+    #     results_df = pd.DataFrame(results_log)
+    #     return status_message, results_df
+    # except requests.exceptions.RequestException as e:
+    #     status_message = f"Submission Failed: Network error - {e}"
+    #     print(status_message)
+    #     results_df = pd.DataFrame(results_log)
+    #     return status_message, results_df
+    # except Exception as e:
+        # status_message = f"An unexpected error occurred during submission: {e}"
+        # print(status_message)
+        # results_df = pd.DataFrame(results_log)
+        # return status_message, results_df
 # --- Build Gradio Interface using Blocks ---
 with gr.Blocks() as demo:
     gr.Markdown("# Basic Agent Evaluation Runner")
     gr.Markdown(
         """
 if __name__ == "__main__":
     print("\n" + "-"*30 + " App Starting " + "-"*30)
     space_host_startup = os.getenv("SPACE_HOST")
+    space_id_startup = os.getenv("SPACE_ID")
     if space_host_startup:
         print(f"✅ SPACE_HOST found: {space_host_startup}")

test.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from FinalAssignmentAgent import FinalAssignmentAgent
+# Inicializa o agente
+agent = FinalAssignmentAgent()
+# Teste 1: Lógica e Busca
+print("--- Teste de Busca e Lógica ---")
+print("Quem foi Alan Turing e qual a sua principal contribuição?")
+response = agent("Quem foi Alan Turing e qual a sua principal contribuição?")
+print(f"Resposta: {response}")
+# Teste 2: Código e Matemática
+print("\n--- Teste de Código ---")
+print("Calcule a raiz quadrada de 144 e multiplique por 5.")
+response = agent("Calcule a raiz quadrada de 144 e multiplique por 5.")
+print(f"Resposta: {response}")
+# Teste 3: Task ID (Simulado)
+# Nota: Este teste só funcionará se o servidor da Unit 4 estiver ativo
+print("\n--- Teste de Task ID ---")
+response = agent("What is in the file for task_id 100?")
+print(f"Resposta: {response}")

tools.py ADDED Viewed

	@@ -0,0 +1,181 @@

+from smolagents import Tool, DuckDuckGoSearchTool
+from huggingface_hub import InferenceClient
+import requests
+import os
+import cv2
+class WikipediaSearchTool(Tool):
+    name = "wikipedia_search"
+    description = (
+        "Use this tool to find factual information, dates, and descriptions from Wikipedia. "
+        "It prioritizes official Wikipedia pages to ensure accuracy for names and historical events."
+    )
+    inputs = {
+        "query": {
+            "type": "string",
+            "description": "The person, place, event, or object to search for.",
+        },
+        "date_context": {
+            "type": "string",
+            "description": "Optional: A specific year or date to narrow down the search (e.g., '1969').",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        # Initialize the base search engine as a component
+        self.search_engine = DuckDuckGoSearchTool()
+    def forward(self, query: str, date_context: str = None) -> str:
+        # 1. Build a specialized query
+        # Adding 'site:en.wikipedia.org' ensures the top results are from Wikipedia
+        refined_query = f"{query} site:en.wikipedia.org"
+        if date_context:
+            refined_query += f" \"{date_context}\""
+        try:
+            # 2. Execute the search
+            results = self.search_engine(refined_query)
+            if not results or "no results" in results.lower():
+                return f"No specific Wikipedia entry found for '{target_name}' with the given context."
+            return results
+        except Exception as e:
+            return f"An error occurred while searching Wikipedia: {str(e)}"
+class CoherenceValidatorTool(Tool):
+    name = "coherence_validator"
+    description = "Checks if a plan or answer is semantically coherent and directly addresses the user request. It validates the relationship between names, ages, logic, and code without re-calculating everything."
+    inputs = {
+        "original_question": {
+            "type": "string",
+            "description": "The user's original query."
+        },
+        "proposed_content": {
+            "type": "string",
+            "description": "The plan, code snippet, or final answer to be audited."
+        }
+    }
+    output_type = "string"
+    def forward(self, original_question: str, proposed_content: str) -> str:
+        audit_prompt = f"""
+        Role: Senior Semantic Auditor
+        Task: Evaluate if the 'Proposed Content' is a coherent and valid response to the 'Original Question'.
+        Original Question: {original_question}
+        Proposed Content: {proposed_content}
+        Evaluation Criteria:
+        1. RELEVANCE: Does the content directly address all parts of the question?
+        2. ENTITY CONSISTENCY: Do names, ages, and dates remain consistent throughout the text?
+        3. CODE/MATH LOGIC: Does the code or mathematical approach 'make sense' for this specific problem (e.g., not calculating temperature when asked for age)?
+        4. ABSURDITY CHECK: Are there any hallucinations or impossible claims?
+        Instructions:
+        - If coherent, return: "COHERENCE_CHECK_PASSED"
+        - If NOT coherent, return: "CRITIQUE: [detailed explanation of what doesn't make sense]"
+        """
+        return audit_prompt
+class FileDownloaderTool(Tool):
+    name = "file_downloader"
+    description = "Downloads task-related files (images or videos) from the evaluation server using a task_id."
+    inputs = {
+        "task_id": {
+            "type": "string",
+            "description": "The ID of the task to download the file for."
+        }
+    }
+    output_type = "string"
+    def forward(self, task_id: str) -> str:
+        api_url = "https://agents-course-unit4-scoring.hf.space/files"
+        try:
+            response = requests.get(f"{api_url}/{task_id}", timeout=15)
+            if response.status_code == 200:
+                # Detect extension
+                ctype = response.headers.get('Content-Type', '')
+                ext = ".mp4" if "video" in ctype else ".jpg"
+                filename = f"file_{task_id}{ext}"
+                with open(filename, "wb") as f:
+                    f.write(response.content)
+                return filename # Returns the local path for other tools to use
+            return f"Error: Server returned status {response.status_code}"
+        except Exception as e:
+            return f"Download failed: {str(e)}"
+class MultimodalAnalysisTool(Tool):
+    name = "multimodal_analyzer"
+    description = "Analyzes images and videos. For images, it provides a description. For videos, it extracts frames every 2 seconds to describe the sequence of events."
+    inputs = {
+        "file_path": {
+            "type": "string",
+            "description": "Local path to the image (.jpg, .png) or video (.mp4) file."
+        },
+        "query": {
+            "type": "string",
+            "description": "Optional: Specific question about the image or video content.",
+            "nullable": True
+        }
+    }
+    output_type = "string"
+    def forward(self, file_path: str, query: str = None) -> str:
+        client = InferenceClient(model="Salesforce/blip-image-captioning-large")
+        if not os.path.exists(file_path):
+            return f"Error: File {file_path} not found."
+        if file_path.lower().endswith(('.mp4', '.avi', '.mov')):
+            descriptions = []
+            video = cv2.VideoCapture(file_path)
+            fps = video.get(cv2.CAP_PROP_FPS)
+            if fps == 0: fps = 24
+            frame_interval = int(fps * 2)
+            count = 0
+            while True:
+                success, frame = video.read()
+                if not success:
+                    break
+                if count % frame_interval == 0:
+                    temp_frame = f"temp_frame_{count}.jpg"
+                    cv2.imwrite(temp_frame, frame)
+                    with open(temp_frame, "rb") as f:
+                        # Analyze the specific frame
+                        desc = client.image_to_text(f.read())
+                        timestamp = count // fps
+                        descriptions.append(f"At {timestamp}s: {desc}")
+                    os.remove(temp_frame)
+                count += 1
+            video.release()
+            return "Video Content Summary: " + " | ".join(descriptions)
+        else:
+            try:
+                with open(file_path, "rb") as f:
+                    image_data = f.read()
+                    description = client.image_to_text(image_data)
+                return f"Image Analysis: {description}"
+            except Exception as e:
+                return f"Error analyzing image: {str(e)}"
+def classify_with_llm(self, question: str) -> str:
+    prompt = (
+        "Classify the following task as 'code' or 'general'. "
+        "Respond with only one word: 'code' or 'general'.\n"
+        f"Task: {question}"
+    )
+    response = self.model(prompt)
+    if "code" in response.lower():
+        return "code"
+    else:
+        return "general"