Final_Assignment_Template

Sleeping

File size: 5,414 Bytes

# import smolagents.models as sm_models

# _orig_roles = sm_models.MessageRole.roles

# @classmethod
# def _roles_with_control(cls):
#     return _orig_roles() + ["control"]

# sm_models.MessageRole.roles = _roles_with_control



from smolagents import (CodeAgent, 
                        GradioUI, 
                        LiteLLMModel, 
                        OpenAIServerModel, 
                        ChatMessage, 
                        ToolCallingAgent)
from smolagents.default_tools import (DuckDuckGoSearchTool, 
                                      VisitWebpageTool, 
                                      WikipediaSearchTool, 
                                      SpeechToTextTool,
                                      PythonInterpreterTool)
import yaml
from tools.final_answer import FinalAnswerTool, check_reasoning, ensure_formatting
from tools.tools import (youtube_frames_to_images, use_vision_model, 
                         read_file, download_file_from_url, 
                         extract_text_from_image, analyze_csv_file, 
                         analyze_excel_file, youtube_transcribe,
                         transcribe_audio, review_youtube_video)
import os
from dotenv import load_dotenv
import time

load_dotenv()

# Load prompts from YAML file
with open("prompts.yaml", 'r') as stream:
    prompt_templates = yaml.safe_load(stream)


# class ThinkingLiteLLMModel(LiteLLMModel):
#     def __init__(self, *args, **kwargs):
#         # ensure the Litellm client also maps "control" → "control"
#         cr = kwargs.pop("custom_role_conversions", {})
#         cr["control"] = "control"
#         super().__init__(*args, custom_role_conversions=cr, **kwargs)

#     def __call__(self, messages, **kwargs) -> ChatMessage:
        # NOTE: content must be a list of {type, text} dicts
#         thinking_msg = {
#             "role": "control",
#             "content": [{"type": "text", "text": "thinking"}]
#         }
#         # prepend onto whatever messages the Agent built
#         return super().__call__([thinking_msg] + messages, **kwargs)
    
class SlowLiteLLMModel(LiteLLMModel):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

    def __call__(self, messages, **kwargs) -> ChatMessage:
        time.sleep(15)
        # prepend onto whatever messages the Agent built
        return super().__call__(messages, **kwargs)

# # search_model_name = 'granite3.3:latest'
# search_model_name = 'cogito:14b'
# # search_model_name = 'qwen2:7b'
# search_model = ThinkingLiteLLMModel(model_id=f'ollama_chat/{search_model_name}',
#                              flatten_messages_as_text=True)

# web_agent = CodeAgent(
#     model=search_model,
#     tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), FinalAnswerTool()],
#     max_steps=6,
#     verbosity_level=1,
#     grammar=None,
#     planning_interval=6,
#     name="web_agent",
#     description="Searches the web using the and reviews web pages to find information.",
#     additional_authorized_imports=['bs4', 'requests', 'io', 'wiki'],
#     prompt_templates=prompt_templates
# )

# image_model_name = 'llama3.2-vision'
# image_model = OpenAIServerModel(model_id=image_model_name,
#                                 api_base='http://localhost:11434/v1/',
#                                 api_key='ollama',
#                             flatten_messages_as_text=False)
# image_agent = ToolCallingAgent(
#     model=image_model,
#     tools=[FinalAnswerTool()],
#     max_steps=4,
#     verbosity_level=2,
#     grammar=None,
#     planning_interval=6,
#     #additional_authorized_imports=["PIL", "requests", "io", "numpy"],
#     name="image_agent",
#     description="Review images and videos for answers to questions based on visual data",
#     prompt_templates=prompt_templates
# )

# react_model_name = 'qwen2:7b'
# # Initialize the chat model
# react_model = OpenAIServerModel(model_id=react_model_name,
#                                 api_base='http://localhost:11434/v1/',
#                                 api_key='ollama',
#                             flatten_messages_as_text=False)

react_model_name = "gemini/gemini-2.5-flash-preview-04-17"
react_model = LiteLLMModel(model_id=react_model_name, 
                           api_key=os.getenv("GEMINI_KEY"),
                           temperature=0.2
                           )


manager_agent = CodeAgent(
    model=react_model,
    tools=[FinalAnswerTool(), 
           DuckDuckGoSearchTool(), 
           VisitWebpageTool(max_output_length=500000), 
           WikipediaSearchTool(extract_format='HTML'),
           SpeechToTextTool(),
           youtube_frames_to_images,
           youtube_transcribe,
           use_vision_model,
           read_file, download_file_from_url, 
           extract_text_from_image, 
           analyze_csv_file, analyze_excel_file,
           transcribe_audio,
           review_youtube_video
           ],
    managed_agents=[],
    additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'],
    max_steps=20,
    verbosity_level=1,
    planning_interval=6,
    name="Manager",
    description="The manager of the team, responsible for overseeing and guiding the team's work.",
    final_answer_checks=[check_reasoning, ensure_formatting],
    prompt_templates=prompt_templates
)



if __name__ == "__main__":
    GradioUI(manager_agent).launch()