Final_Assignment_Template

Sleeping

App Files Files Community

chevisli commited on Jun 12, 2025

Commit

71ddb0d

1 Parent(s): df88b35

Support downloading associated files

Browse files

Files changed (2) hide show

app.py +138 -11
tools.py +61 -6

app.py CHANGED Viewed

@@ -1,4 +1,6 @@
 import os
 import gradio as gr
 import requests
 import inspect
@@ -20,18 +22,94 @@ class BasicAgent:
         print("BasicAgent initialized with LangChain tools.")
         self.agent_executor = agent_executor
-    def __call__(self, question: str) -> str:
-        print(f"Agent received question (first 50 chars): {question[:50]}...")
         try:
-            # Use the LangChain agent executor to process the question
-            response = self.agent_executor.invoke({"input": question})
-            answer = response.get('output', 'No answer generated.')
-            print(f"Agent generated answer: {answer}")
-            return answer
         except Exception as e:
             error_msg = f"Agent error: {str(e)}"
             print(f"Error in agent execution: {e}")
             return error_msg
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
@@ -51,6 +129,7 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
@@ -90,16 +169,64 @@ def run_and_submit_all( profile: gr.OAuthProfile | None):
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
         try:
-            submitted_answer = agent(question_text)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
-            results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": submitted_answer})
         except Exception as e:
-             print(f"Error running agent on task {task_id}: {e}")
-             results_log.append({"Task ID": task_id, "Question": question_text, "Submitted Answer": f"AGENT ERROR: {e}"})
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

 import os
+import base64
+import mimetypes
 import gradio as gr
 import requests
 import inspect
         print("BasicAgent initialized with LangChain tools.")
         self.agent_executor = agent_executor
+    def __call__(self, question_data: dict) -> str:
+        """
+        Process question with optional file attachment
+        question_data = {
+            'text': str,
+            'file_path': str (optional),
+            'file_name': str (optional)
+        }
+        """
+        question_text = question_data.get('text', '')
+        file_path = question_data.get('file_path')
+        print(f"Agent received question (first 50 chars): {question_text[:50]}...")
         try:
+            # Prepare input for the agent
+            if file_path and os.path.exists(file_path):
+                # Handle file input - check if it's an image or other file type
+                file_extension = os.path.splitext(file_path)[1].lower()
+                if file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
+                    # Handle image files - use LLM directly for vision, bypass ReAct agent
+                    with open(file_path, 'rb') as img_file:
+                        img_base64 = base64.b64encode(img_file.read()).decode()
+                    print(f"Processing question with image: {file_path}")
+                    # Use the LLM directly for vision tasks
+                    from langchain_core.messages import HumanMessage
+                    # Create image message for direct LLM call
+                    message = HumanMessage(
+                        content=[
+                            {"type": "text", "text": f"{question_text}\n\nPlease analyze this image and provide your answer in the format: FINAL ANSWER: [YOUR ANSWER]"},
+                            {
+                                "type": "image_url",
+                                "image_url": {"url": f"data:image/{file_extension[1:]};base64,{img_base64}"}
+                            }
+                        ]
+                    )
+                    # Call LLM directly for vision tasks
+                    response = self.agent_executor.llm.invoke([message])
+                    answer = response.content if hasattr(response, 'content') else str(response)
+                    print(f"Agent generated answer: {answer}")
+                    return answer
+                else:
+                    # Handle other file types (text, CSV, etc.) - use ReAct agent
+                    try:
+                        with open(file_path, 'r', encoding='utf-8') as f:
+                            file_content = f.read()
+                        combined_input = f"{question_text}\n\nFile content ({os.path.basename(file_path)}):\n{file_content}"
+                        agent_input = {"input": combined_input}
+                        print(f"Processing question with text file: {file_path}")
+                    except UnicodeDecodeError:
+                        # Binary file - provide file info only
+                        file_info = f"Binary file: {os.path.basename(file_path)} ({os.path.getsize(file_path)} bytes)"
+                        combined_input = f"{question_text}\n\nAttached file: {file_info}"
+                        agent_input = {"input": combined_input}
+                        print(f"Processing question with binary file: {file_path}")
+                    # Use ReAct agent for non-image files
+                    response = self.agent_executor.invoke(agent_input)
+                    answer = response.get('output', 'No answer generated.')
+                    print(f"Agent generated answer: {answer}")
+                    return answer
+            else:
+                # Text-only question - use ReAct agent
+                agent_input = {"input": question_text}
+                print("Processing text-only question")
+                response = self.agent_executor.invoke(agent_input)
+                answer = response.get('output', 'No answer generated.')
+                print(f"Agent generated answer: {answer}")
+                return answer
         except Exception as e:
             error_msg = f"Agent error: {str(e)}"
             print(f"Error in agent execution: {e}")
             return error_msg
+        finally:
+            # Clean up downloaded file
+            if file_path and os.path.exists(file_path):
+                try:
+                    os.remove(file_path)
+                    print(f"Cleaned up file: {file_path}")
+                except Exception as e:
+                    print(f"Warning: Could not remove file {file_path}: {e}")
 def run_and_submit_all( profile: gr.OAuthProfile | None):
     """
     api_url = DEFAULT_API_URL
     questions_url = f"{api_url}/questions"
     submit_url = f"{api_url}/submit"
+    file_url = f"{api_url}/files"
     # 1. Instantiate Agent ( modify this part to create your agent)
     try:
     for item in questions_data:
         task_id = item.get("task_id")
         question_text = item.get("question")
+        file_name = item.get("file_name")
         if not task_id or question_text is None:
             print(f"Skipping item with missing task_id or question: {item}")
             continue
+        # Prepare question data for the agent
+        question_data = {
+            'text': question_text,
+            'file_path': None,
+            'file_name': file_name
+        }
+        # Download file if present
+        if file_name:
+            try:
+                print(f"Downloading file for task {task_id}: {file_name}")
+                file_response = requests.get(f"{file_url}/{task_id}", timeout=30)  # Increased timeout
+                file_response.raise_for_status()
+                # Save file with a safe path
+                safe_file_path = os.path.join(os.getcwd(), f"temp_{task_id}_{file_name}")
+                with open(safe_file_path, "wb") as f:
+                    f.write(file_response.content)
+                question_data['file_path'] = safe_file_path
+                print(f"File downloaded successfully: {safe_file_path}")
+            except requests.exceptions.Timeout:
+                print(f"Timeout downloading file for task {task_id}: {file_name}")
+                question_data['file_path'] = None
+            except Exception as e:
+                print(f"Error downloading file for task {task_id}: {e}")
+                # Continue processing without file
+                question_data['file_path'] = None
+        # Process question with agent
         try:
+            submitted_answer = agent(question_data)
             answers_payload.append({"task_id": task_id, "submitted_answer": submitted_answer})
+            # Log result with file info
+            file_info = f" (with file: {file_name})" if file_name else ""
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text + file_info,
+                "Submitted Answer": submitted_answer
+            })
+            print(f"Task {task_id} completed successfully{file_info}")
         except Exception as e:
+            print(f"Error running agent on task {task_id}: {e}")
+            file_info = f" (with file: {file_name})" if file_name else ""
+            results_log.append({
+                "Task ID": task_id,
+                "Question": question_text + file_info,
+                "Submitted Answer": f"AGENT ERROR: {e}"
+            })
     if not answers_payload:
         print("Agent did not produce any answers to submit.")

tools.py CHANGED Viewed

@@ -3,16 +3,61 @@ from langchain.agents import AgentExecutor, create_react_agent
 from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
 from langchain_core.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI # Or any other LangChain compatible LLM
 from dotenv import load_dotenv
 load_dotenv()
-# 1. Initialize the Tool with API wrapper
-# Create the Google Search API wrapper
 search_wrapper = GoogleSearchAPIWrapper()
 search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
-tools = [search_tool]
 # 2. Create a simple prompt template for an agent
 template = """
@@ -43,12 +88,22 @@ prompt = PromptTemplate.from_template(template)
 # 3. Set up the LLM and Agent
 llm = ChatOpenAI(
-    model="gpt-4o",
-    temperature=0
 )
 agent = create_react_agent(llm, tools, prompt)
-agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
 # 4. Run the agent with a question
 # response = agent_executor.invoke({

 from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
 from langchain_core.prompts import PromptTemplate
 from langchain_openai import ChatOpenAI # Or any other LangChain compatible LLM
+from langchain.tools import Tool
 from dotenv import load_dotenv
+import pandas as pd
+import json
 load_dotenv()
+def analyze_file_content(file_path: str) -> str:
+    """
+    Analyze file content and provide information about the file.
+    """
+    if not os.path.exists(file_path):
+        return f"File not found: {file_path}"
+    try:
+        file_size = os.path.getsize(file_path)
+        file_extension = os.path.splitext(file_path)[1].lower()
+        # Handle different file types
+        if file_extension == '.csv':
+            df = pd.read_csv(file_path)
+            return f"CSV file with {len(df)} rows and {len(df.columns)} columns. Columns: {list(df.columns)[:10]}. First few rows:\n{df.head().to_string()}"
+        elif file_extension == '.json':
+            with open(file_path, 'r', encoding='utf-8') as f:
+                data = json.load(f)
+            return f"JSON file. Keys: {list(data.keys()) if isinstance(data, dict) else 'Array with ' + str(len(data)) + ' items'}"
+        elif file_extension in ['.txt', '.md', '.py', '.js', '.html', '.css']:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            return f"Text file ({file_extension}) with {len(content)} characters. Content preview:\n{content[:500]}..."
+        elif file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
+            return f"Image file ({file_extension}) - {file_size} bytes. Use vision capabilities to analyze this image."
+        else:
+            return f"File: {file_path} ({file_extension}) - {file_size} bytes. Binary or unknown format."
+    except Exception as e:
+        return f"Error analyzing file {file_path}: {str(e)}"
+# 1. Initialize the Tools
+# Google Search Tool
 search_wrapper = GoogleSearchAPIWrapper()
 search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
+# File Analysis Tool
+file_analysis_tool = Tool(
+    name="file_analyzer",
+    description="Analyze the content of files including CSV, JSON, text files, and images. Input should be a file path.",
+    func=analyze_file_content
+)
+tools = [search_tool, file_analysis_tool]
 # 2. Create a simple prompt template for an agent
 template = """
 # 3. Set up the LLM and Agent
 llm = ChatOpenAI(
+    model="gpt-4o",  # Vision-capable model
+    temperature=0,
+    timeout=60,  # 60 second timeout for LLM calls
+    request_timeout=120,  # 2 minute timeout for requests
+    max_retries=2,  # Retry failed requests
 )
 agent = create_react_agent(llm, tools, prompt)
+agent_executor = AgentExecutor(
+    agent=agent,
+    tools=tools,
+    verbose=True,
+    max_execution_time=60,  # 1 minute timeout for entire agent execution
+    max_iterations=10,  # Limit agent iterations to prevent infinite loops
+    early_stopping_method="generate"  # Stop early if final answer is generated
+)
 # 4. Run the agent with a question
 # response = agent_executor.invoke({