File size: 7,897 Bytes
095d02f
3f771a9
edf3100
 
3f771a9
edf3100
3f771a9
 
 
 
 
 
 
 
edf3100
7da5655
 
edf3100
095d02f
3f771a9
095d02f
3f771a9
095d02f
3f771a9
7da5655
3f771a9
 
095d02f
 
 
edf3100
3f771a9
7da5655
3f771a9
095d02f
edf3100
 
 
3f771a9
edf3100
 
7da5655
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3f771a9
edf3100
3f771a9
 
 
 
 
 
 
 
 
edf3100
7da5655
 
 
 
3f771a9
7da5655
 
 
edf3100
7da5655
 
edf3100
 
7da5655
 
 
 
 
 
 
 
095d02f
3f771a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7da5655
3f771a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
edf3100
7da5655
 
 
 
 
3f771a9
7da5655
3f771a9
 
 
 
7da5655
 
 
 
 
 
3f771a9
7da5655
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.llms.gemini import Gemini
from llama_index.tools.arxiv import ArxivToolSpec
from llama_index.tools.wikipedia import WikipediaToolSpec
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
from llama_index.core.tools import FunctionTool
from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
from llama_index.llms.lmstudio import LMStudio
from llama_index.core.agent.workflow import (
    AgentStream,
    AgentOutput
)
from gradio import ChatMessage
from llama_index.core.base.llms.types import ChatMessage as llama_index_chat_message

from tools import interpret_python_math_code, image_understanding, convert_audio_to_text, video_understanding, read_csv_file, read_xlsx_file
from gaia_system_prompt import GAIA_SYSTEM_PROMPT, CUSTOM_SYSTEM_PROMPT

import os
import asyncio

TIMEOUT=180 # Timeout for agent execution in seconds
GEMINI_API_KEY = os.getenv("GEMINI_TOKEN")
GEMINI_OPENAI_API_DIR = "https://generativelanguage.googleapis.com/v1beta/openai/"
GEMINI_MODEL_NAME = "gemini-2.0-flash"
LMSTUDIO_MODEL_NAME = "gemma-3-12B-it-qat-GGUF"
API_DIR = "http://host.docker.internal:1234/v1"  # LM Studio API URL

class FinalAgent:
    def __init__(self):
        # LLM Initialization
        # self.llm = GoogleGenAI(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
        self.llm = Gemini(model=GEMINI_MODEL_NAME, api_key=GEMINI_API_KEY)
        # self.llm = LMStudio(model_name=LMSTUDIO_MODEL_NAME, base_url=API_DIR, request_timeout=180, temperature=0.1)

        # Tool Initialization
        self.tools = [
            FunctionTool.from_defaults(
                fn=interpret_python_math_code,
                name="InterpretPythonMathCode",
                description="Interprets Python code for mathematical expressions."
            ),
            FunctionTool.from_defaults(
                fn=image_understanding,
                name="ImageUnderstanding",
                description="Analyzes an image and generates a response to a given question based on the image's content."
            ),
            FunctionTool.from_defaults(
                fn=convert_audio_to_text,
                name="ConvertAudioToText",
                description="Converts audio files to text using a speech-to-text model."
            ),
            FunctionTool.from_defaults(
                fn=video_understanding,
                name="VideoUnderstanding",
                description="Analyzes a video and generates a response to a given question based on the video's content."
            ),
            FunctionTool.from_defaults(
                fn=read_csv_file,
                name="ReadCSVFile",
                description="Reads a CSV file and returns its content as a string."
            ),
            FunctionTool.from_defaults(
                fn=read_xlsx_file,
                name="ReadXLSXFile",
                description="Reads an XLSX file and returns its content as a string."
            )
        ]
        self.tools.extend(
            ArxivToolSpec().to_tool_list()
        )
        self.tools.extend(
            WikipediaToolSpec().to_tool_list()
        )
        self.tools.extend(
            DuckDuckGoSearchToolSpec().to_tool_list()
        )

        # Print the tools for debugging
        print("Tools initialized:")
        for tool in self.tools:
            print(f"- {tool._metadata}")

        # Agent Workflow Initialization
        self.agent = AgentWorkflow.from_tools_or_functions(
            tools_or_functions=self.tools,
            llm=self.llm,
            system_prompt=CUSTOM_SYSTEM_PROMPT,
            timeout=TIMEOUT
        )

        # self.agent = ReActAgent(
        #     llm=self.llm,
        #     verbose=True,
        #     max_iterations=5,
        #     system_prompt=CUSTOM_SYSTEM_PROMPT,
        #     tools=self.tools
        # )

        print("FinalAgent initialized.")
    # async def __call__(self, question: str) -> str:
    #     # Example
    #     print(f"Agent received question: {question}")
    #     # fixed_answer = "This is a default answer."
    #     # print(f"Agent returning fixed answer: {fixed_answer}")
    #     # response = fixed_answer

    #     # Implement agent logic here
    #     response = ""
    #     # Run the agent with the question
    #     stream = await self.agent.run(question)
    #     response = stream.response.content
    #     # async for event in stream.stream_events():
    #     #         if isinstance(event, AgentStream):
    #     #              # Check if delta is empty
    #     #             if event.raw["choices"][0]["delta"] != {}:
    #     #                 response += event.raw["choices"][0]["delta"]["content"]
        
    #     print(f"Agent response: {response}")
        
    #     return response

    async def __call__(self, question: str) -> str:
        print(f"Agent received question: {question}")
        
        response_str = ""
        try:
            # Use arun for an async method.
            agent_chat_response = await self.agent.run(question)
            print(agent_chat_response)
            
            potential_response_obj = agent_chat_response.response

            if isinstance(potential_response_obj, ChatMessage):
                # If it's a ChatMessage, its .content attribute should hold the string
                print(f"DEBUG: Response object is ChatMessage. Role: {potential_response_obj.role}")
                response_str = potential_response_obj.content
                if response_str is None: # Handle cases where content might be None
                    print("DEBUG: ChatMessage content is None, defaulting to empty string.")
                    response_str = ""
            elif isinstance(potential_response_obj, str):
                # If it's already a string
                print("DEBUG: Response object is str.")
                response_str = potential_response_obj
            elif isinstance(potential_response_obj, llama_index_chat_message):
                # If it's a llama_index ChatMessage, use its .content attribute
                print(f"DEBUG: Response object is llama_index ChatMessage. Role: {potential_response_obj.role}")
                response_str = potential_response_obj.content
                if response_str is None:
                    print("DEBUG: llama_index ChatMessage content is None, defaulting to empty string.")
                    response_str = ""
            else:
                # Fallback if it's some other type
                print(f"Warning: Agent response was of unexpected type: {type(potential_response_obj)}. Converting to string.")
                response_str = str(potential_response_obj)

        except Exception as e:
            print(f"Error during agent execution with LLM {self.llm.__class__.__name__}: {e}")
            # Depending on requirements, you might want to return an error message or re-raise
            response_str = f"Agent error: {e}" 
        
        # Get the agent's final response between <final_answer> and </final_answer> tags
        if "<final_answer>" in response_str and "</final_answer>" in response_str:
            start_index = response_str.index("<final_answer>") + len("<final_answer>")
            end_index = response_str.index("</final_answer>")
            response_str = response_str[start_index:end_index].strip()
        else:
            print("Warning: No <final_answer> tags found in the response.")

        return response_str
    

# async def main():
#     # Example usage
#     agent = FinalAgent()
#     question = "How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia."
#     answer = await agent(question)
#     print(f"Final answer: {answer}")

# if __name__ == "__main__":
#     asyncio.run(main())