Spaces:
Sleeping
Sleeping
| # import smolagents.models as sm_models | |
| # _orig_roles = sm_models.MessageRole.roles | |
| # @classmethod | |
| # def _roles_with_control(cls): | |
| # return _orig_roles() + ["control"] | |
| # sm_models.MessageRole.roles = _roles_with_control | |
| from smolagents import (CodeAgent, | |
| GradioUI, | |
| LiteLLMModel, | |
| OpenAIServerModel, | |
| ChatMessage, | |
| ToolCallingAgent) | |
| from smolagents.default_tools import (DuckDuckGoSearchTool, | |
| VisitWebpageTool, | |
| WikipediaSearchTool, | |
| SpeechToTextTool, | |
| PythonInterpreterTool) | |
| import yaml | |
| from tools.final_answer import FinalAnswerTool, check_reasoning, ensure_formatting | |
| from tools.tools import (youtube_frames_to_images, use_vision_model, | |
| read_file, download_file_from_url, | |
| extract_text_from_image, analyze_csv_file, | |
| analyze_excel_file, youtube_transcribe, | |
| transcribe_audio, review_youtube_video) | |
| import os | |
| from dotenv import load_dotenv | |
| import time | |
| load_dotenv() | |
| # Load prompts from YAML file | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| # class ThinkingLiteLLMModel(LiteLLMModel): | |
| # def __init__(self, *args, **kwargs): | |
| # # ensure the Litellm client also maps "control" → "control" | |
| # cr = kwargs.pop("custom_role_conversions", {}) | |
| # cr["control"] = "control" | |
| # super().__init__(*args, custom_role_conversions=cr, **kwargs) | |
| # def __call__(self, messages, **kwargs) -> ChatMessage: | |
| # NOTE: content must be a list of {type, text} dicts | |
| # thinking_msg = { | |
| # "role": "control", | |
| # "content": [{"type": "text", "text": "thinking"}] | |
| # } | |
| # # prepend onto whatever messages the Agent built | |
| # return super().__call__([thinking_msg] + messages, **kwargs) | |
| class SlowLiteLLMModel(LiteLLMModel): | |
| def __init__(self, *args, **kwargs): | |
| super().__init__(*args, **kwargs) | |
| def __call__(self, messages, **kwargs) -> ChatMessage: | |
| time.sleep(15) | |
| # prepend onto whatever messages the Agent built | |
| return super().__call__(messages, **kwargs) | |
| # # search_model_name = 'granite3.3:latest' | |
| # search_model_name = 'cogito:14b' | |
| # # search_model_name = 'qwen2:7b' | |
| # search_model = ThinkingLiteLLMModel(model_id=f'ollama_chat/{search_model_name}', | |
| # flatten_messages_as_text=True) | |
| # web_agent = CodeAgent( | |
| # model=search_model, | |
| # tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), FinalAnswerTool()], | |
| # max_steps=6, | |
| # verbosity_level=1, | |
| # grammar=None, | |
| # planning_interval=6, | |
| # name="web_agent", | |
| # description="Searches the web using the and reviews web pages to find information.", | |
| # additional_authorized_imports=['bs4', 'requests', 'io', 'wiki'], | |
| # prompt_templates=prompt_templates | |
| # ) | |
| # image_model_name = 'llama3.2-vision' | |
| # image_model = OpenAIServerModel(model_id=image_model_name, | |
| # api_base='http://localhost:11434/v1/', | |
| # api_key='ollama', | |
| # flatten_messages_as_text=False) | |
| # image_agent = ToolCallingAgent( | |
| # model=image_model, | |
| # tools=[FinalAnswerTool()], | |
| # max_steps=4, | |
| # verbosity_level=2, | |
| # grammar=None, | |
| # planning_interval=6, | |
| # #additional_authorized_imports=["PIL", "requests", "io", "numpy"], | |
| # name="image_agent", | |
| # description="Review images and videos for answers to questions based on visual data", | |
| # prompt_templates=prompt_templates | |
| # ) | |
| # react_model_name = 'qwen2:7b' | |
| # # Initialize the chat model | |
| # react_model = OpenAIServerModel(model_id=react_model_name, | |
| # api_base='http://localhost:11434/v1/', | |
| # api_key='ollama', | |
| # flatten_messages_as_text=False) | |
| react_model_name = "gemini/gemini-2.5-flash-preview-04-17" | |
| react_model = LiteLLMModel(model_id=react_model_name, | |
| api_key=os.getenv("GEMINI_KEY"), | |
| temperature=0.2 | |
| ) | |
| manager_agent = CodeAgent( | |
| model=react_model, | |
| tools=[FinalAnswerTool(), | |
| DuckDuckGoSearchTool(), | |
| VisitWebpageTool(max_output_length=500000), | |
| WikipediaSearchTool(extract_format='HTML'), | |
| SpeechToTextTool(), | |
| youtube_frames_to_images, | |
| youtube_transcribe, | |
| use_vision_model, | |
| read_file, download_file_from_url, | |
| extract_text_from_image, | |
| analyze_csv_file, analyze_excel_file, | |
| transcribe_audio, | |
| review_youtube_video | |
| ], | |
| managed_agents=[], | |
| additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'], | |
| max_steps=20, | |
| verbosity_level=1, | |
| planning_interval=6, | |
| name="Manager", | |
| description="The manager of the team, responsible for overseeing and guiding the team's work.", | |
| final_answer_checks=[check_reasoning, ensure_formatting], | |
| prompt_templates=prompt_templates | |
| ) | |
| if __name__ == "__main__": | |
| GradioUI(manager_agent).launch() |