Final_Assignment_Template

Sleeping

App Files Files Community

Stardust00 commited on Jun 12, 2025

Commit

0335261

1 Parent(s): 71ddb0d

setup tools

Browse files

Files changed (10) hide show

pyproject.toml +2 -0
tools.py +0 -114
utils/agent_executor.py +39 -0
utils/audio_parser_tool.py +0 -0
utils/document_parser_tool.py +236 -0
utils/file_downloader.py +327 -0
utils/prompt.py +26 -0
utils/search_tool.py +20 -0
utils/tools.py +4 -0
uv.lock +37 -0

pyproject.toml CHANGED Viewed

@@ -8,12 +8,14 @@ dependencies = [
     "gradio[oauth]>=4.0.0",
     "requests>=2.25.0",
     "pandas>=1.3.0",
     "python-dotenv>=1.0.0",
     "langchain>=0.1.0",
     "langchain-community>=0.0.20",
     "langchain-core>=0.1.0",
     "langchain-openai>=0.0.5",
     "langchain-google-community>=1.0.0",
 ]
 [project.optional-dependencies]

     "gradio[oauth]>=4.0.0",
     "requests>=2.25.0",
     "pandas>=1.3.0",
+    "pypdf>=5.6.0",
     "python-dotenv>=1.0.0",
     "langchain>=0.1.0",
     "langchain-community>=0.0.20",
     "langchain-core>=0.1.0",
     "langchain-openai>=0.0.5",
     "langchain-google-community>=1.0.0",
+    "openpyxl>=3.0.0",
 ]
 [project.optional-dependencies]

tools.py DELETED Viewed

@@ -1,114 +0,0 @@
-import os
-from langchain.agents import AgentExecutor, create_react_agent
-from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
-from langchain_core.prompts import PromptTemplate
-from langchain_openai import ChatOpenAI # Or any other LangChain compatible LLM
-from langchain.tools import Tool
-from dotenv import load_dotenv
-import pandas as pd
-import json
-load_dotenv()
-def analyze_file_content(file_path: str) -> str:
-    """
-    Analyze file content and provide information about the file.
-    """
-    if not os.path.exists(file_path):
-        return f"File not found: {file_path}"
-    try:
-        file_size = os.path.getsize(file_path)
-        file_extension = os.path.splitext(file_path)[1].lower()
-        # Handle different file types
-        if file_extension == '.csv':
-            df = pd.read_csv(file_path)
-            return f"CSV file with {len(df)} rows and {len(df.columns)} columns. Columns: {list(df.columns)[:10]}. First few rows:\n{df.head().to_string()}"
-        elif file_extension == '.json':
-            with open(file_path, 'r', encoding='utf-8') as f:
-                data = json.load(f)
-            return f"JSON file. Keys: {list(data.keys()) if isinstance(data, dict) else 'Array with ' + str(len(data)) + ' items'}"
-        elif file_extension in ['.txt', '.md', '.py', '.js', '.html', '.css']:
-            with open(file_path, 'r', encoding='utf-8') as f:
-                content = f.read()
-            return f"Text file ({file_extension}) with {len(content)} characters. Content preview:\n{content[:500]}..."
-        elif file_extension in ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp']:
-            return f"Image file ({file_extension}) - {file_size} bytes. Use vision capabilities to analyze this image."
-        else:
-            return f"File: {file_path} ({file_extension}) - {file_size} bytes. Binary or unknown format."
-    except Exception as e:
-        return f"Error analyzing file {file_path}: {str(e)}"
-# 1. Initialize the Tools
-# Google Search Tool
-search_wrapper = GoogleSearchAPIWrapper()
-search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
-# File Analysis Tool
-file_analysis_tool = Tool(
-    name="file_analyzer",
-    description="Analyze the content of files including CSV, JSON, text files, and images. Input should be a file path.",
-    func=analyze_file_content
-)
-tools = [search_tool, file_analysis_tool]
-# 2. Create a simple prompt template for an agent
-template = """
-You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
-Answer the following questions as best you can. You have access to the following tools:
-{tools}
-Use the following format:
-Question: the input question you must answer
-Thought: you should always think about what to do
-Action: the action to take, should be one of [{tool_names}]
-Action Input: the input to the action
-Observation: the result of the action
-... (this Thought/Action/Action Input/Observation can repeat N times)
-Thought: I now know the final answer
-Final Answer: the final answer to the original input question
-Begin!
-Question: {input}
-{agent_scratchpad}
-"""
-prompt = PromptTemplate.from_template(template)
-# 3. Set up the LLM and Agent
-llm = ChatOpenAI(
-    model="gpt-4o",  # Vision-capable model
-    temperature=0,
-    timeout=60,  # 60 second timeout for LLM calls
-    request_timeout=120,  # 2 minute timeout for requests
-    max_retries=2,  # Retry failed requests
-)
-agent = create_react_agent(llm, tools, prompt)
-agent_executor = AgentExecutor(
-    agent=agent,
-    tools=tools,
-    verbose=True,
-    max_execution_time=60,  # 1 minute timeout for entire agent execution
-    max_iterations=10,  # Limit agent iterations to prevent infinite loops
-    early_stopping_method="generate"  # Stop early if final answer is generated
-)
-# 4. Run the agent with a question
-# response = agent_executor.invoke({
-    # "input": "What is the current capital of Australia and when was it founded?"
-# })
-# print("\nFinal Answer:")
-# print(response['output'])

utils/agent_executor.py ADDED Viewed

	@@ -0,0 +1,39 @@

+from langchain.agents import AgentExecutor, create_react_agent
+from langchain_openai import ChatOpenAI
+from prompt import prompt_default
+def create_agent_executor(
+    llm=None,
+    tools=None,
+    prompt=None,
+    verbose=True,
+    max_execution_time=60,
+    max_iterations=10,
+    early_stopping_method="generate",
+):
+    if llm is None:
+        llm = ChatOpenAI(
+            model="gpt-4o",  # Vision-capable model
+            temperature=0,
+            timeout=60,  # 60 second timeout for LLM calls
+            request_timeout=120,  # 2 minute timeout for requests
+            max_retries=2,  # Retry failed requests
+        )
+    if tools is None:
+        tools = []
+    if prompt is None:
+        prompt = prompt_default
+    agent = create_react_agent(llm, tools, prompt)
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        verbose=verbose,
+        max_execution_time=max_execution_time,
+        max_iterations=max_iterations,
+        early_stopping_method=early_stopping_method,
+    )
+    return agent_executor

utils/audio_parser_tool.py ADDED Viewed

File without changes

utils/document_parser_tool.py ADDED Viewed

	@@ -0,0 +1,236 @@

+import os
+import pandas as pd
+from langchain_community.document_loaders import PyPDFLoader
+from langchain.tools import Tool
+from agent_executor import create_agent_executor
+from file_downloader import FileDownloader
+from dotenv import load_dotenv
+load_dotenv()
+class DocumentParserTool:
+    """A tool for parsing PDF and XLSX documents."""
+    def __init__(self):
+        """Initialize the DocumentParserTool with FileDownloader."""
+        self.downloader = FileDownloader()
+    def parse_document_from_url_or_path(self, path_or_url: str) -> str:
+        """
+        Parse a document from URL or file path. Downloads if URL, uses directly if path.
+        Args:
+            path_or_url (str): URL to download from or file path to use
+        Returns:
+            str: Parsed content of the document
+        """
+        try:
+            # Get file path (download if URL, verify if file path)
+            file_path = self.downloader.get_file_path(path_or_url)
+            # Parse the document
+            result = self.parse_document(file_path)
+            # Add context about the source
+            source_info = f"Source: {'Downloaded from ' + path_or_url if self.downloader.is_url(path_or_url) else 'File at ' + path_or_url}\n"
+            source_info += f"Local file path: {file_path}\n\n"
+            return source_info + result
+        except Exception as e:
+            return f"Error processing {path_or_url}: {str(e)}"
+    def parse_document(self, document_path: str) -> str:
+        """
+        Parse a document from the given file path.
+        Args:
+            document_path (str): Path to the document file
+        Returns:
+            str: Parsed content of the document
+        """
+        if not os.path.exists(document_path):
+            return f"Error: File not found at path: {document_path}"
+        try:
+            file_extension = os.path.splitext(document_path)[1].lower()
+            if file_extension == ".pdf":
+                return self._parse_pdf(document_path)
+            elif file_extension in [".xlsx", ".xls"]:
+                return self._parse_excel(document_path)
+            else:
+                return f"Error: Unsupported file format '{file_extension}'. Supported formats: PDF (.pdf), Excel (.xlsx, .xls)"
+        except Exception as e:
+            return f"Error parsing document: {str(e)}"
+    def _parse_pdf(self, document_path: str) -> str:
+        """Parse PDF document and extract text content."""
+        try:
+            loader = PyPDFLoader(document_path)
+            pages = loader.load_and_split()
+            pdf_text = " ".join(page.page_content for page in pages)
+            if not pdf_text.strip():
+                return (
+                    "Warning: PDF appears to be empty or contains no extractable text."
+                )
+            return (
+                f"PDF Content (from {os.path.basename(document_path)}):\n\n{pdf_text}"
+            )
+        except Exception as e:
+            return f"Error parsing PDF: {str(e)}"
+    def _parse_excel(self, document_path: str) -> str:
+        """Parse Excel document and extract structured data."""
+        try:
+            # Read all sheets from the Excel file
+            excel_file = pd.ExcelFile(document_path)
+            sheet_names = excel_file.sheet_names
+            if not sheet_names:
+                return "Warning: Excel file contains no sheets."
+            parsed_content = (
+                f"Excel Content (from {os.path.basename(document_path)}):\n\n"
+            )
+            parsed_content += f"Number of sheets: {len(sheet_names)}\n"
+            parsed_content += f"Sheet names: {', '.join(sheet_names)}\n\n"
+            for sheet_name in sheet_names:
+                try:
+                    df = pd.read_excel(document_path, sheet_name=sheet_name)
+                    parsed_content += f"--- Sheet: {sheet_name} ---\n"
+                    parsed_content += (
+                        f"Dimensions: {df.shape[0]} rows × {df.shape[1]} columns\n"
+                    )
+                    if df.empty:
+                        parsed_content += "Sheet is empty.\n\n"
+                        continue
+                    parsed_content += (
+                        f"Columns: {', '.join(df.columns.astype(str))}\n\n"
+                    )
+                    # Include first few rows as sample data
+                    sample_rows = min(5, len(df))
+                    parsed_content += f"Sample data (first {sample_rows} rows):\n"
+                    parsed_content += df.head(sample_rows).to_string(index=False)
+                    parsed_content += "\n\n"
+                    # Include summary statistics for numeric columns
+                    numeric_cols = df.select_dtypes(include=["number"]).columns
+                    if not numeric_cols.empty:
+                        parsed_content += "Summary statistics for numeric columns:\n"
+                        parsed_content += df[numeric_cols].describe().to_string()
+                        parsed_content += "\n\n"
+                except Exception as sheet_error:
+                    parsed_content += (
+                        f"Error reading sheet '{sheet_name}': {str(sheet_error)}\n\n"
+                    )
+            return parsed_content
+        except Exception as e:
+            return f"Error parsing Excel file: {str(e)}"
+# Create the DocumentParserTool instance
+document_parser_tool_instance = DocumentParserTool()
+# Create a LangChain Tool wrapper for the document parser (file paths only)
+document_parser_tool = Tool(
+    name="document_parser",
+    description=(
+        "Parse PDF and Excel (.xlsx, .xls) documents to extract their content. "
+        "For PDFs, extracts all text content. For Excel files, provides structured data "
+        "including sheet names, dimensions, column headers, sample data, and summary statistics. "
+        "Input should be a file path to the document."
+    ),
+    func=document_parser_tool_instance.parse_document,
+)
+# Create a LangChain Tool wrapper for the document parser with URL/path support
+document_parser_url_tool = Tool(
+    name="document_parser_url",
+    description=(
+        "Parse PDF and Excel (.xlsx, .xls) documents from URLs or file paths. "
+        "If URL is provided, downloads the file first. If file path is provided, uses it directly. "
+        "For PDFs, extracts all text content. For Excel files, provides structured data "
+        "including sheet names, dimensions, column headers, sample data, and summary statistics. "
+        "Input can be either a URL (http/https) or a local file path."
+    ),
+    func=document_parser_tool_instance.parse_document_from_url_or_path,
+)
+if __name__ == "__main__":
+    print("Start testing document parser tool with file downloader integration")
+    # Initialize file downloader
+    downloader = FileDownloader()
+    # Test with both URLs and file paths
+    test_files = [
+        "https://arxiv.org/pdf/2501.00147",  # URL - should be downloaded
+        # "https://agents-course-unit4-scoring.hf.space/files/7bd855d8-463d-4ed5-93ca-5fe35145f733",  # URL - should be downloaded
+        # "./test_document.pdf",  # File path - should be used directly (if exists)
+    ]
+    downloaded_files = []  # Keep track of downloaded files for cleanup
+    for test_input in test_files:
+        print(f"\n--- Processing: {test_input} ---")
+        try:
+            # Get file path (download if URL, verify if file path)
+            file_path = downloader.get_file_path(test_input)
+            print(f"Using file path: {file_path}")
+            # Track downloaded files for cleanup
+            if downloader.is_url(test_input):
+                downloaded_files.append(file_path)
+            # Test document parser with the file
+            result = document_parser_tool_instance.parse_document(file_path)
+            print(
+                f"Parse result preview: {result[:500] + '...' if len(result) > 500 else result}"
+            )
+            # Test with agent executor using the URL-capable tool
+            tools = [document_parser_url_tool]
+            agent_executor = create_agent_executor(tools=tools)
+            # Create a comprehensive prompt that includes the original input
+            prompt_with_input = f"""Please analyze the document from this source: {test_input}
+            Use the document_parser_url tool to download (if URL) and analyze the content.
+            Provide a comprehensive summary of what you find in the document.
+            The tool will handle both URLs (by downloading) and file paths (by using directly)."""
+            print(f"\n--- Testing with Agent Executor (URL-capable tool) ---")
+            response = agent_executor.invoke({"input": prompt_with_input})
+            print("Agent Response:")
+            print(response["output"])
+        except Exception as e:
+            print(f"Error processing {test_input}: {str(e)}")
+    # Cleanup downloaded files
+    print(f"\n--- Cleanup ---")
+    for file_path in downloaded_files:
+        try:
+            downloader.delete_file(file_path)
+        except Exception as e:
+            print(f"Warning: Could not delete {file_path}: {e}")
+    print(f"Final downloader state: {repr(downloader)}")

utils/file_downloader.py ADDED Viewed

	@@ -0,0 +1,327 @@

+import os
+import requests
+import shutil
+import tempfile
+import uuid
+from pathlib import Path
+from typing import Optional, List
+from urllib.parse import urlparse
+class FileDownloader:
+    """
+    A class for downloading files from URLs and managing them in a temporary directory.
+    Provides functionality to:
+    1. Download files from URLs and save to tmp directory
+    2. Delete specific files from tmp directory
+    3. Clear all files from tmp directory
+    """
+    def __init__(self, tmp_dir_name: str = "tmp"):
+        """
+        Initialize the FileDownloader.
+        Args:
+            tmp_dir_name (str): Name of the temporary directory to use
+        """
+        self.tmp_dir_name = tmp_dir_name
+        self.tmp_dir_path = Path(tmp_dir_name)
+        self._ensure_tmp_directory()
+    def _ensure_tmp_directory(self) -> None:
+        """Ensure the temporary directory exists."""
+        self.tmp_dir_path.mkdir(exist_ok=True)
+    def _get_filename_from_url(self, url: str) -> str:
+        """
+        Extract filename from URL, with fallback to generated name.
+        Args:
+            url (str): The URL to extract filename from
+        Returns:
+            str: The filename
+        """
+        parsed_url = urlparse(url)
+        filename = os.path.basename(parsed_url.path)
+        # If no filename found in URL, generate one
+        if not filename or '.' not in filename:
+            # Try to get extension from content-type later, for now use generic
+            filename = f"downloaded_file_{uuid.uuid4().hex[:8]}"
+        return filename
+    def _get_unique_filename(self, filename: str) -> str:
+        """
+        Ensure filename is unique in the tmp directory.
+        Args:
+            filename (str): Original filename
+        Returns:
+            str: Unique filename
+        """
+        base_path = self.tmp_dir_path / filename
+        if not base_path.exists():
+            return filename
+        # Split filename into name and extension
+        name_part = base_path.stem
+        ext_part = base_path.suffix
+        counter = 1
+        while True:
+            new_filename = f"{name_part}_{counter}{ext_part}"
+            new_path = self.tmp_dir_path / new_filename
+            if not new_path.exists():
+                return new_filename
+            counter += 1
+    def download(self, url: str, filename: Optional[str] = None,
+                 timeout: int = 30, chunk_size: int = 8192) -> str:
+        """
+        Download a file from URL and save to tmp directory.
+        Args:
+            url (str): URL to download from
+            filename (str, optional): Custom filename. If None, extract from URL
+            timeout (int): Request timeout in seconds
+            chunk_size (int): Size of chunks for streaming download
+        Returns:
+            str: Full path to the downloaded file
+        Raises:
+            requests.RequestException: If download fails
+            IOError: If file writing fails
+        """
+        try:
+            # Start the download
+            response = requests.get(url, stream=True, timeout=timeout)
+            response.raise_for_status()
+            # Determine filename
+            if filename is None:
+                filename = self._get_filename_from_url(url)
+                # Try to get better filename from Content-Disposition header
+                content_disposition = response.headers.get('content-disposition')
+                if content_disposition and 'filename=' in content_disposition:
+                    try:
+                        # Extract filename from Content-Disposition header
+                        import re
+                        filename_match = re.search(r'filename[*]?=([^;]+)', content_disposition)
+                        if filename_match:
+                            header_filename = filename_match.group(1).strip('"\'')
+                            if header_filename:
+                                filename = header_filename
+                    except Exception:
+                        # If header parsing fails, keep the original filename
+                        pass
+                # If still no extension, try to infer from content-type
+                if '.' not in filename:
+                    content_type = response.headers.get('content-type', '').lower()
+                    if 'pdf' in content_type:
+                        filename += '.pdf'
+                    elif 'image/jpeg' in content_type or 'image/jpg' in content_type:
+                        filename += '.jpg'
+                    elif 'image/png' in content_type:
+                        filename += '.png'
+                    elif 'text/plain' in content_type:
+                        filename += '.txt'
+                    elif 'application/json' in content_type:
+                        filename += '.json'
+                    elif 'text/html' in content_type:
+                        filename += '.html'
+            # Ensure unique filename
+            filename = self._get_unique_filename(filename)
+            file_path = self.tmp_dir_path / filename
+            # Download and save file in chunks
+            with open(file_path, 'wb') as f:
+                for chunk in response.iter_content(chunk_size=chunk_size):
+                    if chunk:  # Filter out keep-alive chunks
+                        f.write(chunk)
+            print(f"Successfully downloaded: {url} -> {file_path}")
+            return str(file_path)
+        except requests.exceptions.RequestException as e:
+            raise requests.RequestException(f"Failed to download {url}: {str(e)}")
+        except IOError as e:
+            raise IOError(f"Failed to save file {filename}: {str(e)}")
+    def delete_file(self, file_path: str) -> bool:
+        """
+        Delete a specific file from the tmp directory.
+        Args:
+            file_path (str): Path to the file to delete (can be full path or just filename)
+        Returns:
+            bool: True if file was deleted, False if file didn't exist
+        Raises:
+            ValueError: If file is not in the tmp directory
+            OSError: If deletion fails
+        """
+        # Convert to Path object
+        path = Path(file_path)
+        # If it's just a filename, assume it's in tmp directory
+        if not path.is_absolute() and len(path.parts) == 1:
+            path = self.tmp_dir_path / path
+        # Ensure the file is within our tmp directory for security
+        try:
+            resolved_path = path.resolve()
+            tmp_resolved = self.tmp_dir_path.resolve()
+            if not str(resolved_path).startswith(str(tmp_resolved)):
+                raise ValueError(f"File {file_path} is not in the tmp directory {self.tmp_dir_path}")
+        except (OSError, ValueError) as e:
+            raise ValueError(f"Invalid file path {file_path}: {str(e)}")
+        # Delete the file
+        if path.exists():
+            try:
+                path.unlink()
+                print(f"Successfully deleted: {path}")
+                return True
+            except OSError as e:
+                raise OSError(f"Failed to delete {path}: {str(e)}")
+        else:
+            print(f"File not found: {path}")
+            return False
+    def clear_tmp_directory(self) -> int:
+        """
+        Clear all files from the tmp directory.
+        Returns:
+            int: Number of files deleted
+        Raises:
+            OSError: If clearing fails
+        """
+        if not self.tmp_dir_path.exists():
+            print(f"Tmp directory {self.tmp_dir_path} does not exist")
+            return 0
+        deleted_count = 0
+        errors = []
+        try:
+            for item in self.tmp_dir_path.iterdir():
+                try:
+                    if item.is_file():
+                        item.unlink()
+                        deleted_count += 1
+                        print(f"Deleted file: {item}")
+                    elif item.is_dir():
+                        shutil.rmtree(item)
+                        deleted_count += 1
+                        print(f"Deleted directory: {item}")
+                except OSError as e:
+                    errors.append(f"Failed to delete {item}: {str(e)}")
+        except OSError as e:
+            raise OSError(f"Failed to access tmp directory: {str(e)}")
+        if errors:
+            error_msg = "; ".join(errors)
+            raise OSError(f"Some files could not be deleted: {error_msg}")
+        print(f"Successfully cleared tmp directory. Deleted {deleted_count} items.")
+        return deleted_count
+    def list_files(self) -> List[str]:
+        """
+        List all files in the tmp directory.
+        Returns:
+            List[str]: List of file paths in the tmp directory
+        """
+        if not self.tmp_dir_path.exists():
+            return []
+        files = []
+        try:
+            for item in self.tmp_dir_path.iterdir():
+                if item.is_file():
+                    files.append(str(item))
+        except OSError:
+            # If we can't read the directory, return empty list
+            pass
+        return files
+    def get_tmp_dir_size(self) -> int:
+        """
+        Get the total size of all files in the tmp directory.
+        Returns:
+            int: Total size in bytes
+        """
+        if not self.tmp_dir_path.exists():
+            return 0
+        total_size = 0
+        try:
+            for item in self.tmp_dir_path.rglob('*'):
+                if item.is_file():
+                    total_size += item.stat().st_size
+        except OSError:
+            # If we can't access some files, return partial size
+            pass
+        return total_size
+    def is_url(self, path_or_url: str) -> bool:
+        """
+        Check if the given string is a URL or a file path.
+        Args:
+            path_or_url (str): String to check
+        Returns:
+            bool: True if it's a URL, False if it's a file path
+        """
+        return path_or_url.startswith(('http://', 'https://'))
+    def get_file_path(self, path_or_url: str, filename: Optional[str] = None) -> str:
+        """
+        Get file path - download if URL, return as-is if file path.
+        Args:
+            path_or_url (str): URL to download or file path to use
+            filename (str, optional): Custom filename for downloads
+        Returns:
+            str: File path to use
+        Raises:
+            FileNotFoundError: If file path doesn't exist
+            requests.RequestException: If URL download fails
+        """
+        if self.is_url(path_or_url):
+            # It's a URL, download it
+            return self.download(path_or_url, filename)
+        else:
+            # It's a file path, verify it exists
+            if not os.path.exists(path_or_url):
+                raise FileNotFoundError(f"File not found: {path_or_url}")
+            return path_or_url
+    def __str__(self) -> str:
+        """String representation of the FileDownloader."""
+        return f"FileDownloader(tmp_dir='{self.tmp_dir_path}')"
+    def __repr__(self) -> str:
+        """Detailed string representation of the FileDownloader."""
+        file_count = len(self.list_files())
+        size = self.get_tmp_dir_size()
+        return f"FileDownloader(tmp_dir='{self.tmp_dir_path}', files={file_count}, size={size} bytes)"

utils/prompt.py ADDED Viewed

	@@ -0,0 +1,26 @@

+from langchain_core.prompts import PromptTemplate
+template_default = """
+You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+Answer the following questions as best you can. You have access to the following tools:
+{tools}
+Use the following format:
+Question: the input question you must answer
+Thought: you should always think about what to do
+Action: the action to take, should be one of [{tool_names}]
+Action Input: the input to the action
+Observation: the result of the action
+... (this Thought/Action/Action Input/Observation can repeat N times)
+Thought: I now know the final answer
+Final Answer: the final answer to the original input question
+Begin!
+Question: {input}
+{agent_scratchpad}
+"""
+prompt_default = PromptTemplate.from_template(template_default)

utils/search_tool.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from langchain_google_community import GoogleSearchRun, GoogleSearchAPIWrapper
+from dotenv import load_dotenv
+from agent_executor import create_agent_executor
+load_dotenv()
+search_wrapper = GoogleSearchAPIWrapper()
+search_tool = GoogleSearchRun(api_wrapper=search_wrapper)
+if __name__ == "__main__":
+    print("Start testing search tool with an example question")
+    tools = [search_tool]
+    agent_executor = create_agent_executor(tools=tools)
+    response = agent_executor.invoke(
+        {"input": "What is the current capital of Australia and when was it founded?"}
+    )
+    print("\nFinal Answer:")
+    print(response["output"])

utils/tools.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from search_tool import search_tool
+from document_parser_tool import document_parser_tool
+tools = [search_tool, document_parser_tool]

uv.lock CHANGED Viewed

@@ -20,7 +20,9 @@ dependencies = [
     { name = "langchain-core" },
     { name = "langchain-google-community" },
     { name = "langchain-openai" },
     { name = "pandas" },
     { name = "python-dotenv" },
     { name = "requests" },
 ]
@@ -49,7 +51,9 @@ requires-dist = [
     { name = "langchain-core", specifier = ">=0.1.0" },
     { name = "langchain-google-community", specifier = ">=1.0.0" },
     { name = "langchain-openai", specifier = ">=0.0.5" },
     { name = "pandas", specifier = ">=1.3.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "requests", specifier = ">=2.25.0" },
@@ -533,6 +537,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
 ]
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
@@ -1650,6 +1663,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/58/c1/dfb16b3432810fc9758564f9d1a4dbce6b93b7fb763ba57530c7fc48316d/openai-1.86.0-py3-none-any.whl", hash = "sha256:c8889c39410621fe955c230cc4c21bfe36ec887f4e60a957de05f507d7e1f349", size = 730296, upload-time = "2025-06-10T16:50:30.495Z" },
 ]
 [[package]]
 name = "orjson"
 version = "3.10.18"
@@ -2184,6 +2209,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" },
 ]
 [[package]]
 name = "pytest"
 version = "8.4.0"

     { name = "langchain-core" },
     { name = "langchain-google-community" },
     { name = "langchain-openai" },
+    { name = "openpyxl" },
     { name = "pandas" },
+    { name = "pypdf" },
     { name = "python-dotenv" },
     { name = "requests" },
 ]
     { name = "langchain-core", specifier = ">=0.1.0" },
     { name = "langchain-google-community", specifier = ">=1.0.0" },
     { name = "langchain-openai", specifier = ">=0.0.5" },
+    { name = "openpyxl", specifier = ">=3.0.0" },
     { name = "pandas", specifier = ">=1.3.0" },
+    { name = "pypdf", specifier = ">=5.6.0" },
     { name = "pytest", marker = "extra == 'dev'", specifier = ">=7.0" },
     { name = "python-dotenv", specifier = ">=1.0.0" },
     { name = "requests", specifier = ">=2.25.0" },
     { url = "https://files.pythonhosted.org/packages/12/b3/231ffd4ab1fc9d679809f356cebee130ac7daa00d6d6f3206dd4fd137e9e/distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2", size = 20277, upload-time = "2023-12-24T09:54:30.421Z" },
 ]
+[[package]]
+name = "et-xmlfile"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d3/38/af70d7ab1ae9d4da450eeec1fa3918940a5fafb9055e934af8d6eb0c2313/et_xmlfile-2.0.0.tar.gz", hash = "sha256:dab3f4764309081ce75662649be815c4c9081e88f0837825f90fd28317d4da54", size = 17234, upload-time = "2024-10-25T17:25:40.039Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c1/8b/5fe2cc11fee489817272089c4203e679c63b570a5aaeb18d852ae3cbba6a/et_xmlfile-2.0.0-py3-none-any.whl", hash = "sha256:7a91720bc756843502c3b7504c77b8fe44217c85c537d85037f0f536151b2caa", size = 18059, upload-time = "2024-10-25T17:25:39.051Z" },
+]
 [[package]]
 name = "exceptiongroup"
 version = "1.3.0"
     { url = "https://files.pythonhosted.org/packages/58/c1/dfb16b3432810fc9758564f9d1a4dbce6b93b7fb763ba57530c7fc48316d/openai-1.86.0-py3-none-any.whl", hash = "sha256:c8889c39410621fe955c230cc4c21bfe36ec887f4e60a957de05f507d7e1f349", size = 730296, upload-time = "2025-06-10T16:50:30.495Z" },
 ]
+[[package]]
+name = "openpyxl"
+version = "3.1.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "et-xmlfile" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/f9/88d94a75de065ea32619465d2f77b29a0469500e99012523b91cc4141cd1/openpyxl-3.1.5.tar.gz", hash = "sha256:cf0e3cf56142039133628b5acffe8ef0c12bc902d2aadd3e0fe5878dc08d1050", size = 186464, upload-time = "2024-06-28T14:03:44.161Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c0/da/977ded879c29cbd04de313843e76868e6e13408a94ed6b987245dc7c8506/openpyxl-3.1.5-py2.py3-none-any.whl", hash = "sha256:5282c12b107bffeef825f4617dc029afaf41d0ea60823bbb665ef3079dc79de2", size = 250910, upload-time = "2024-06-28T14:03:41.161Z" },
+]
 [[package]]
 name = "orjson"
 version = "3.10.18"
     { url = "https://files.pythonhosted.org/packages/05/e7/df2285f3d08fee213f2d041540fa4fc9ca6c2d44cf36d3a035bf2a8d2bcc/pyparsing-3.2.3-py3-none-any.whl", hash = "sha256:a749938e02d6fd0b59b356ca504a24982314bb090c383e3cf201c95ef7e2bfcf", size = 111120, upload-time = "2025-03-25T05:01:24.908Z" },
 ]
+[[package]]
+name = "pypdf"
+version = "5.6.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/40/46/67de1d7a65412aa1c896e6b280829b70b57d203fadae6859b690006b8e0a/pypdf-5.6.0.tar.gz", hash = "sha256:a4b6538b77fc796622000db7127e4e58039ec5e6afd292f8e9bf42e2e985a749", size = 5023749, upload-time = "2025-06-01T12:19:40.101Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/71/8b/dc3a72d98c22be7a4cbd664ad14c5a3e6295c2dbdf572865ed61e24b5e38/pypdf-5.6.0-py3-none-any.whl", hash = "sha256:ca6bf446bfb0a2d8d71d6d6bb860798d864c36a29b3d9ae8d7fc7958c59f88e7", size = 304208, upload-time = "2025-06-01T12:19:38.003Z" },
+]
 [[package]]
 name = "pytest"
 version = "8.4.0"