Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / agent.py

FD900

Update agent.py

f5eb447 verified 10 months ago

raw

history blame

5.41 kB

	# import smolagents.models as sm_models

	# _orig_roles = sm_models.MessageRole.roles

	# @classmethod
	# def _roles_with_control(cls):
	# return _orig_roles() + ["control"]

	# sm_models.MessageRole.roles = _roles_with_control



	from smolagents import (CodeAgent,
	GradioUI,
	LiteLLMModel,
	OpenAIServerModel,
	ChatMessage,
	ToolCallingAgent)
	from smolagents.default_tools import (DuckDuckGoSearchTool,
	VisitWebpageTool,
	WikipediaSearchTool,
	SpeechToTextTool,
	PythonInterpreterTool)
	import yaml
	from tools.final_answer import FinalAnswerTool, check_reasoning, ensure_formatting
	from tools.tools import (youtube_frames_to_images, use_vision_model,
	read_file, download_file_from_url,
	extract_text_from_image, analyze_csv_file,
	analyze_excel_file, youtube_transcribe,
	transcribe_audio, review_youtube_video)
	import os
	from dotenv import load_dotenv
	import time

	load_dotenv()

	# Load prompts from YAML file
	with open("prompts.yaml", 'r') as stream:
	prompt_templates = yaml.safe_load(stream)


	# class ThinkingLiteLLMModel(LiteLLMModel):
	# def __init__(self, args, *kwargs):
	# # ensure the Litellm client also maps "control" → "control"
	# cr = kwargs.pop("custom_role_conversions", {})
	# cr["control"] = "control"
	# super().__init__(args, custom_role_conversions=cr, *kwargs)

	# def __call__(self, messages, **kwargs) -> ChatMessage:
	# NOTE: content must be a list of {type, text} dicts
	# thinking_msg = {
	# "role": "control",
	# "content": [{"type": "text", "text": "thinking"}]
	# }
	# # prepend onto whatever messages the Agent built
	# return super().__call__([thinking_msg] + messages, **kwargs)

	class SlowLiteLLMModel(LiteLLMModel):
	def __init__(self, args, *kwargs):
	super().__init__(args, *kwargs)

	def __call__(self, messages, **kwargs) -> ChatMessage:
	time.sleep(15)
	# prepend onto whatever messages the Agent built
	return super().__call__(messages, **kwargs)

	# # search_model_name = 'granite3.3:latest'
	# search_model_name = 'cogito:14b'
	# # search_model_name = 'qwen2:7b'
	# search_model = ThinkingLiteLLMModel(model_id=f'ollama_chat/{search_model_name}',
	# flatten_messages_as_text=True)

	# web_agent = CodeAgent(
	# model=search_model,
	# tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), FinalAnswerTool()],
	# max_steps=6,
	# verbosity_level=1,
	# grammar=None,
	# planning_interval=6,
	# name="web_agent",
	# description="Searches the web using the and reviews web pages to find information.",
	# additional_authorized_imports=['bs4', 'requests', 'io', 'wiki'],
	# prompt_templates=prompt_templates
	# )

	# image_model_name = 'llama3.2-vision'
	# image_model = OpenAIServerModel(model_id=image_model_name,
	# api_base='http://localhost:11434/v1/',
	# api_key='ollama',
	# flatten_messages_as_text=False)
	# image_agent = ToolCallingAgent(
	# model=image_model,
	# tools=[FinalAnswerTool()],
	# max_steps=4,
	# verbosity_level=2,
	# grammar=None,
	# planning_interval=6,
	# #additional_authorized_imports=["PIL", "requests", "io", "numpy"],
	# name="image_agent",
	# description="Review images and videos for answers to questions based on visual data",
	# prompt_templates=prompt_templates
	# )

	# react_model_name = 'qwen2:7b'
	# # Initialize the chat model
	# react_model = OpenAIServerModel(model_id=react_model_name,
	# api_base='http://localhost:11434/v1/',
	# api_key='ollama',
	# flatten_messages_as_text=False)

	react_model_name = "gemini/gemini-2.5-flash-preview-04-17"
	react_model = LiteLLMModel(model_id=react_model_name,
	api_key=os.getenv("GEMINI_KEY"),
	temperature=0.2
	)


	manager_agent = CodeAgent(
	model=react_model,
	tools=[FinalAnswerTool(),
	DuckDuckGoSearchTool(),
	VisitWebpageTool(max_output_length=500000),
	WikipediaSearchTool(extract_format='HTML'),
	SpeechToTextTool(),
	youtube_frames_to_images,
	youtube_transcribe,
	use_vision_model,
	read_file, download_file_from_url,
	extract_text_from_image,
	analyze_csv_file, analyze_excel_file,
	transcribe_audio,
	review_youtube_video
	],
	managed_agents=[],
	additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'],
	max_steps=20,
	verbosity_level=1,
	planning_interval=6,
	name="Manager",
	description="The manager of the team, responsible for overseeing and guiding the team's work.",
	final_answer_checks=[check_reasoning, ensure_formatting],
	prompt_templates=prompt_templates
	)



	if __name__ == "__main__":
	GradioUI(manager_agent).launch()