File size: 4,269 Bytes
03cd67b
 
8abd124
c984898
03cd67b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
820aa0b
04e04c8
03cd67b
efdee21
03cd67b
 
c984898
03cd67b
 
 
 
 
 
 
 
 
 
 
 
 
3c965e9
 
8abd124
e93d4ea
 
8abd124
3c965e9
 
03cd67b
 
 
 
8abd124
28040a7
03cd67b
 
8abd124
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import os
from typing import TypedDict, List, Dict, Any, Optional
from langchain.agents import create_tool_calling_agent, AgentExecutor
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langchain_core.prompts import ChatPromptTemplate

# 1. Web Browsing
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_community.document_loaders import ImageCaptionLoader
import requests
import pandas as pd
from pypdf import PdfReader

@tool
def web_search(query: str) -> str:
    """Allows search through DuckDuckGo.
    Args:
        query: what you want to search
    """
    search = DuckDuckGoSearchRun()
    results = search.invoke(query)
    return "\n".join(results)

@tool
def visit_webpage(url: str) -> str:
    """Fetches raw HTML content of a web page.
    Args:
        url: the webpage url
    """
    try:
        response = requests.get(url, timeout=5)
        return response.text
    except Exception as e:
        return f"[ERROR fetching {url}]: {str(e)}"

# 4. File Reading
@tool
def read_file(dir: str) -> str:
    """Read the content of the provided file
    Args:
        dir: the filepath
    """
    extension = dir.split['.'][-1]
    if extension == 'xlsx':
        dataframe = pd.read_excel(dir)
        return dataframe.to_string()
    elif extension == 'pdf':
        reader = PdfReader(dir)
        contents = [p.extract_text() for p in reader.pages]
        return "\n".join(contents)
    else:
        with open(dir) as f:
            return f.read()

# 5. Image Open
@tool
def image_caption(dir: str) -> str:
    """Understand the content of the provided image
    Args:
        dir: the image url link
    """
    loader = ImageCaptionLoader(images=[dir])
    metadata = loader.load()
    return metadata[0].page_content

# 2. Coding
# 3. Multi-Modality

# ("human", f"Question: {question}\nReport to validate: {final_answer}")
class BasicAgent:
    def __init__(self):
        self.model = ChatGoogleGenerativeAI(
            model="gemini-2.0-flash",
            temperature=0,
            max_tokens=1024,
            timeout=None,
            max_retries=2,
            google_api_key="AIzaSyAxVUPaGJIgdxB46ZR0RWPKSjB9a63Z80o",
            # other params...
        )
        # System Prompt for few shot prompting
        self.sys_prompt = """"
                You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
                YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separared list of numbers and/or strings.
                If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
                If you are asked for a string, don't use articles, neither abbreviations (eg. for cities), and write the digits in plain text unless specified otherwise.
                If you are asked for a comma separated list, apply the above rules depending of whether the element to put in the list is a number or a string.

                There are few tools provided: web_search, visit_webpage, read_file and image_caption. 
                Here are few examples demonstrating how to call and use the tools.
        """
        self.tools = [web_search, visit_webpage, read_file, image_caption]
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", self.sys_prompt),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}")
        ])
        self.agent = create_tool_calling_agent(self.model, self.tools, self.prompt)
        self.agent_exe = AgentExecutor(agent=self.agent, tools=self.tools, verbose=True)
        print("BasicAgent initialized.")
    
    def __call__(self, question: str) -> str:
        print(f"Agent received question (first 50 chars): {question[:50]}...")
        response = self.agent_exe.invoke({"input": question})
        fixed_answer = response['message'][-1].content
        # fixed_answer = "This is a default answer."
        print(f"Agent returning fixed answer: {fixed_answer}")
        return fixed_answer