import math from typing import Optional, Tuple, Literal from smolagents import tool @tool def extract_text_from_audio(file_path : str) -> str: """given a path to an audio file, it extract and returns the text contained in it as a string""" import speech_recognition as sr r = sr.Recognizer() with sr.AudioFile(file_path) as source: # listen for the data (load audio to memory) audio_data = r.record(source) # recognize (convert from speech to text) text = r.recognize_google(audio_data) return text class TestAgent: def __init__(self): # import code agent and basic tool from smolagent from smolagents import CodeAgent, OpenAIServerModel, DuckDuckGoSearchTool, FinalAnswerTool, VisitWebpageTool, MCPClient # import additional tool from langchain @ https://docs.langchain.com/oss/python/integrations/tools #from langchain_community.agent_toolkits import load_tools from langchain_community.agent_toolkits.load_tools import load_tools from smolagents import Tool wikipedia_tool = Tool.from_langchain(load_tools(["wikipedia"])[0]) wikipedia_tool.top_k_results=3 # import tools from MCP servers @ https://github.com/mcp #from mcp import StdioServerParameters #server_parameters = StdioServerParameters(command="uvx", # args=["--quiet", "youtubeqa@0.2.1"], # env={"UV_PYTHON": "3.12", **os.environ}, # ) #youtube_tools = MCPServerTool(server_params=server_parameters) model = OpenAIServerModel(model_id="gpt-4o") #model = InferenceClientModel("Qwen/Qwen2.5-Coder-32B-Instruct") # Instantiate the agent self.agent = CodeAgent( tools=[extract_text_from_audio, # homemade tool DuckDuckGoSearchTool(), # basic tools from smolagent VisitWebpageTool(), wikipedia_tool, # tool from langchain with extra parmaeters #youtube_tools, # tool from MCP server FinalAnswerTool()], additional_authorized_imports=["pandas","markdownify","requests"], model=model, max_steps=3, verbosity_level=2, use_structured_outputs_internally=True. # V3. Adds structure ) # V3. add Guidance prompt_for_guidance = "\n10. Provide the answer axactly as it is asked, be concise and precise\n\nNow Begin!" self.agent.prompt_templates['system_prompt'] = self.agent.prompt_templates['system_prompt'] + prompt_for_guidance def __call__(self, question: str) -> str: print(f"Agent received question (first 50 chars): {question[:50]}...") answer = self.agent.run(question) print(f"Agent returning his answer: {answer}") return answer