Final_Assignment_Template

Paused

AlexanderKazakov

gpt-4.1, adjusted prompt, adjusted visit_webpage output size

a31ddf7 11 months ago

5.49 kB

	import json
	import os
	import re
	import time

	import yaml
	from duckduckgo_search.exceptions import DuckDuckGoSearchException
	from smolagents import FinalAnswerTool, Tool, OpenAIServerModel, CodeAgent


	python_interpreter_max_print_outputs_length = 10**6


	class CustomDuckDuckGoSearchTool(Tool):
	name = "web_search"
	description = """Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."""
	inputs = {"query": {"type": "string", "description": "The search query to perform."}}
	output_type = "string"

	def __init__(self, max_results=10, **kwargs):
	super().__init__()
	self.max_results = max_results
	try:
	from duckduckgo_search import DDGS
	except ImportError as e:
	raise ImportError(
	"You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
	) from e
	self.ddgs = DDGS(**kwargs)

	def forward(self, query: str) -> str:
	num_tries = 5
	for cnt in range(num_tries):
	try:
	results = self.ddgs.text(query, max_results=self.max_results)
	break
	except DuckDuckGoSearchException as e:
	print(e)
	if cnt == num_tries - 1:
	raise
	time.sleep(1.5)

	if len(results) == 0:
	raise Exception("No results found! Try a less restrictive/shorter query.")
	postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
	return "## Search Results\n\n" + "\n\n".join(postprocessed_results)


	class CustomVisitWebpageTool(Tool):
	name = "visit_webpage"
	description = (
	"Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
	)
	inputs = {
	"url": {
	"type": "string",
	"description": "The url of the webpage to visit.",
	}
	}
	output_type = "string"

	def forward(self, url: str) -> str:
	try:
	import requests
	from markdownify import markdownify
	from requests.exceptions import RequestException

	from smolagents.utils import truncate_content
	except ImportError as e:
	raise ImportError(
	"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
	) from e
	try:
	# Send a GET request to the URL with a 20-second timeout
	response = requests.get(url, timeout=20)
	response.raise_for_status() # Raise an exception for bad status codes

	# Convert the HTML content to Markdown
	markdown_content = markdownify(response.text).strip()

	# Remove multiple line breaks
	markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)

	return truncate_content(markdown_content, python_interpreter_max_print_outputs_length)

	except requests.exceptions.Timeout:
	return "The request timed out. Please try again later or check the URL."
	except RequestException as e:
	return f"Error fetching the webpage: {str(e)}"
	except Exception as e:
	return f"An unexpected error occurred: {str(e)}"


	class SmolAgent:
	def __init__(self, openai_api_key=None):
	final_answer = FinalAnswerTool()
	search_tool = CustomDuckDuckGoSearchTool(max_results=3)
	visit_webpage_tool = CustomVisitWebpageTool()
	model = OpenAIServerModel(
	model_id="gpt-4.1-2025-04-14",
	# model_id="gpt-4.1-mini-2025-04-14",
	# model_id="gpt-4.1-nano-2025-04-14",
	max_completion_tokens=1024,
	temperature=0.01,
	api_key=openai_api_key,
	)
	with open('prompt_templates.yaml', 'r') as f:
	prompt_templates = yaml.safe_load(f)
	with open('system_prompt.txt', 'r') as f:
	prompt_templates['system_prompt'] = f.read()
	self.agent = CodeAgent(
	model=model,
	prompt_templates=prompt_templates,
	tools=[search_tool, visit_webpage_tool, final_answer],
	max_steps=10,
	verbosity_level=100,
	grammar=None,
	planning_interval=None,
	name='Advanced GAIA Agent',
	description=None,
	max_print_outputs_length=python_interpreter_max_print_outputs_length,
	)
	self.agent.visualize()

	def run(self, task: dict[str, str]) -> str:
	if len(task.get('file_name')) != 0:
	return '' # skip questions where file processing is needed

	question = task.get('question')
	if question.find('www.youtube.com') != -1:
	return '' # skip questions where file processing is needed

	return self.agent.run(question)


	if __name__ == '__main__':
	openai_key = os.getenv('OPENAI_API_KEY')
	if not openai_key:
	with open("data/openai.key", "r") as f:
	openai_key = f.read().strip()

	agent = SmolAgent(openai_api_key=openai_key)

	with open('data/questions.json', 'r') as f:
	questions = json.load(f)

	for q in questions:
	print('\n===')
	print(q)
	print('\n---')
	a = agent.run(q)
	print('\n---')
	print(a)