Spaces:
Sleeping
Sleeping
File size: 5,414 Bytes
f5eb447 713b432 f5eb447 e52cce4 f5eb447 e52cce4 f5eb447 e52cce4 1fdeb35 f5eb447 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 | # import smolagents.models as sm_models
# _orig_roles = sm_models.MessageRole.roles
# @classmethod
# def _roles_with_control(cls):
# return _orig_roles() + ["control"]
# sm_models.MessageRole.roles = _roles_with_control
from smolagents import (CodeAgent,
GradioUI,
LiteLLMModel,
OpenAIServerModel,
ChatMessage,
ToolCallingAgent)
from smolagents.default_tools import (DuckDuckGoSearchTool,
VisitWebpageTool,
WikipediaSearchTool,
SpeechToTextTool,
PythonInterpreterTool)
import yaml
from tools.final_answer import FinalAnswerTool, check_reasoning, ensure_formatting
from tools.tools import (youtube_frames_to_images, use_vision_model,
read_file, download_file_from_url,
extract_text_from_image, analyze_csv_file,
analyze_excel_file, youtube_transcribe,
transcribe_audio, review_youtube_video)
import os
from dotenv import load_dotenv
import time
load_dotenv()
# Load prompts from YAML file
with open("prompts.yaml", 'r') as stream:
prompt_templates = yaml.safe_load(stream)
# class ThinkingLiteLLMModel(LiteLLMModel):
# def __init__(self, *args, **kwargs):
# # ensure the Litellm client also maps "control" → "control"
# cr = kwargs.pop("custom_role_conversions", {})
# cr["control"] = "control"
# super().__init__(*args, custom_role_conversions=cr, **kwargs)
# def __call__(self, messages, **kwargs) -> ChatMessage:
# NOTE: content must be a list of {type, text} dicts
# thinking_msg = {
# "role": "control",
# "content": [{"type": "text", "text": "thinking"}]
# }
# # prepend onto whatever messages the Agent built
# return super().__call__([thinking_msg] + messages, **kwargs)
class SlowLiteLLMModel(LiteLLMModel):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
def __call__(self, messages, **kwargs) -> ChatMessage:
time.sleep(15)
# prepend onto whatever messages the Agent built
return super().__call__(messages, **kwargs)
# # search_model_name = 'granite3.3:latest'
# search_model_name = 'cogito:14b'
# # search_model_name = 'qwen2:7b'
# search_model = ThinkingLiteLLMModel(model_id=f'ollama_chat/{search_model_name}',
# flatten_messages_as_text=True)
# web_agent = CodeAgent(
# model=search_model,
# tools=[DuckDuckGoSearchTool(), VisitWebpageTool(), FinalAnswerTool()],
# max_steps=6,
# verbosity_level=1,
# grammar=None,
# planning_interval=6,
# name="web_agent",
# description="Searches the web using the and reviews web pages to find information.",
# additional_authorized_imports=['bs4', 'requests', 'io', 'wiki'],
# prompt_templates=prompt_templates
# )
# image_model_name = 'llama3.2-vision'
# image_model = OpenAIServerModel(model_id=image_model_name,
# api_base='http://localhost:11434/v1/',
# api_key='ollama',
# flatten_messages_as_text=False)
# image_agent = ToolCallingAgent(
# model=image_model,
# tools=[FinalAnswerTool()],
# max_steps=4,
# verbosity_level=2,
# grammar=None,
# planning_interval=6,
# #additional_authorized_imports=["PIL", "requests", "io", "numpy"],
# name="image_agent",
# description="Review images and videos for answers to questions based on visual data",
# prompt_templates=prompt_templates
# )
# react_model_name = 'qwen2:7b'
# # Initialize the chat model
# react_model = OpenAIServerModel(model_id=react_model_name,
# api_base='http://localhost:11434/v1/',
# api_key='ollama',
# flatten_messages_as_text=False)
react_model_name = "gemini/gemini-2.5-flash-preview-04-17"
react_model = LiteLLMModel(model_id=react_model_name,
api_key=os.getenv("GEMINI_KEY"),
temperature=0.2
)
manager_agent = CodeAgent(
model=react_model,
tools=[FinalAnswerTool(),
DuckDuckGoSearchTool(),
VisitWebpageTool(max_output_length=500000),
WikipediaSearchTool(extract_format='HTML'),
SpeechToTextTool(),
youtube_frames_to_images,
youtube_transcribe,
use_vision_model,
read_file, download_file_from_url,
extract_text_from_image,
analyze_csv_file, analyze_excel_file,
transcribe_audio,
review_youtube_video
],
managed_agents=[],
additional_authorized_imports=['os', 'pandas', 'numpy', 'PIL', 'tempfile', 'PIL.Image'],
max_steps=20,
verbosity_level=1,
planning_interval=6,
name="Manager",
description="The manager of the team, responsible for overseeing and guiding the team's work.",
final_answer_checks=[check_reasoning, ensure_formatting],
prompt_templates=prompt_templates
)
if __name__ == "__main__":
GradioUI(manager_agent).launch() |