FD900's picture
Update agent.py
f5eb447 verified
raw
history blame
5.41 kB
# import smolagents.models as sm_models
# _orig_roles = sm_models.MessageRole.roles
# @classmethod
# def _roles_with_control(cls):
# return _orig_roles() + ["control"]
# sm_models.MessageRole.roles = _roles_with_control
from smolagents import (CodeAgent,
GradioUI,
LiteLLMModel,
OpenAIServerModel,
ChatMessage,
ToolCallingAgent)
from smolagents.default_tools import (DuckDuckGoSearchTool,
VisitWebpageTool,
WikipediaSearchTool,
SpeechToTextTool,
PythonInterpreterTool)
import yaml
from tools.final_answer import FinalAnswerTool, check_reasoning, ensure_formatting
from tools.tools import (youtube_frames_to_images, use_vision_model,
read_file, download_file_from_url,
extract_text_from_image, analyze_csv_file,
analyze_excel_file, youtube_transcribe,
transcribe_audio, review_youtube_video)
import os
from dotenv import load_dotenv
import time
load_dotenv()
# Load prompts from YAML file
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# class ThinkingLiteLLMModel(LiteLLMModel):
# def __init__(self, *args, **kwargs):
# # ensure the Litellm client also maps "control" → "control"
# cr = kwargs.pop("custom_role_conversions", {})
# cr["control"] = "control"
# super().__init__(*args, custom_role_conversions=cr, **kwargs)
# def __call__(self, messages, **kwargs) -> ChatMessage:
# NOTE: content must be a list of {type, text} dicts
# thinking_msg = {
# "role": "control",
# "content": [{"type": "text", "text": "thinking"}]
# }
# # prepend onto whatever messages the Agent built
# return super().__call__([thinking_msg] + messages, **kwargs)
class SlowLiteLLMModel(LiteLLMModel):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __call__(self, messages, **kwargs) -> ChatMessage:
time.sleep(15)
# prepend onto whatever messages the Agent built
return super().__call__(messages, **kwargs)
# # search_model_name = 'granite3.3:latest'
# search_model_name = 'cogito:14b'
# # search_model_name = 'qwen2:7b'
# search_model = ThinkingLiteLLMModel(model_id=f'ollama_chat/{search_model_name}',
# flatten_messages_as_text=True)
# web_agent = CodeAgent(
# model=search_model,
# tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), FinalAnswerTool()],
# max_steps=6,
# verbosity_level=1,
# grammar=None,
# planning_interval=6,
# name="web_agent",
# description="Searches the web using the and reviews web pages to find information.",
# additional_authorized_imports=['bs4', 'requests', 'io', 'wiki'],
# prompt_templates=prompt_templates
# )
# image_model_name = 'llama3.2-vision'
# image_model = OpenAIServerModel(model_id=image_model_name,
# api_base='http://localhost:11434/v1/',
# api_key='ollama',
# flatten_messages_as_text=False)
# image_agent = ToolCallingAgent(
# model=image_model,
# tools=[FinalAnswerTool()],
# max_steps=4,
# verbosity_level=2,
# grammar=None,
# planning_interval=6,
# #additional_authorized_imports=["PIL", "requests", "io", "numpy"],
# name="image_agent",
# description="Review images and videos for answers to questions based on visual data",
# prompt_templates=prompt_templates
# )
# react_model_name = 'qwen2:7b'
# # Initialize the chat model
# react_model = OpenAIServerModel(model_id=react_model_name,
# api_base='http://localhost:11434/v1/',
# api_key='ollama',
# flatten_messages_as_text=False)
react_model_name = "gemini/gemini-2.5-flash-preview-04-17"
react_model = LiteLLMModel(model_id=react_model_name,
api_key=os.getenv("GEMINI_KEY"),
temperature=0.2
)
manager_agent = CodeAgent(
model=react_model,
tools=[FinalAnswerTool(),
DuckDuckGoSearchTool(),
VisitWebpageTool(max_output_length=500000),
WikipediaSearchTool(extract_format='HTML'),
SpeechToTextTool(),
youtube_frames_to_images,
youtube_transcribe,
use_vision_model,
read_file, download_file_from_url,
extract_text_from_image,
analyze_csv_file, analyze_excel_file,
transcribe_audio,
review_youtube_video
],
managed_agents=[],
additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'],
max_steps=20,
verbosity_level=1,
planning_interval=6,
name="Manager",
description="The manager of the team, responsible for overseeing and guiding the team's work.",
final_answer_checks=[check_reasoning, ensure_formatting],
prompt_templates=prompt_templates
)
if __name__ == "__main__":
GradioUI(manager_agent).launch()