Spaces:
Sleeping
Sleeping
| from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel,load_tool,tool | |
| import datetime | |
| import requests | |
| import pytz | |
| import yaml | |
| from tools.final_answer import FinalAnswerTool | |
| from bs4 import BeautifulSoup | |
| import time | |
| import re | |
| from urllib.parse import quote | |
| from Gradio_UI import GradioUI | |
| def search_internet_archive(query): | |
| """ | |
| Search the Internet Archive for pages containing the exact query phrase. | |
| Returns a list of (timestamp, snapshot_url) tuples sorted by date. | |
| """ | |
| # Encode the query and construct the search URL | |
| encoded_query = quote(f'"{query}"') | |
| search_url = f"https://archive.org/advancedsearch.php?q={encoded_query}&fl[]=identifier&sort[]=publicdate&output=json" | |
| try: | |
| response = requests.get(search_url, timeout=10) | |
| response.raise_for_status() | |
| data = response.json() | |
| # Extract snapshot identifiers and construct full URLs | |
| snapshots = [] | |
| for item in data.get('response', {}).get('docs', []): | |
| identifier = item.get('identifier', '') | |
| # Wayback URLs follow the pattern: /web/[timestamp]/[original_url] | |
| # Identifiers are typically in the format: [timestamp][original_url] | |
| match = re.match(r'(\d{14})(.+)', identifier) | |
| if match: | |
| timestamp, url_part = match.groups() | |
| snapshot_url = f"https://web.archive.org/web/{timestamp}/{url_part}" | |
| snapshots.append((timestamp, snapshot_url)) | |
| return sorted(snapshots, key=lambda x: x[0]) # Sort by timestamp | |
| except Exception as e: | |
| print(f"Error searching Internet Archive: {e}") | |
| return [] | |
| def check_snapshot_for_phrase(snapshot_url, phrase): | |
| """ | |
| Retrieve the snapshot content and check if the phrase is present. | |
| Returns True if found, False otherwise. | |
| """ | |
| try: | |
| response = requests.get(snapshot_url, timeout=10) | |
| response.raise_for_status() | |
| return phrase.lower() in response.text.lower() # Case-insensitive search | |
| except Exception as e: | |
| print(f"Error retrieving snapshot {snapshot_url}: {e}") | |
| return False | |
| def find_first_mention(phrase): | |
| """ | |
| Find the first archived mention of the phrase on the internet. | |
| Returns the timestamp and URL of the earliest snapshot containing the phrase. | |
| """ | |
| print(f"Searching for '{phrase}'...") | |
| snapshots = search_internet_archive(phrase) | |
| if not snapshots: | |
| return None, None | |
| for timestamp, snapshot_url in snapshots: | |
| print(f"Checking snapshot from {timestamp}...") | |
| if check_snapshot_for_phrase(snapshot_url, phrase): | |
| return timestamp, snapshot_url | |
| time.sleep(1) # Be polite to the server | |
| return None, None | |
| def first_mention(phrase:str)-> str: | |
| #Keep this format for the description / args / args description but feel free to modify the tool | |
| """A tool that is able to find the first archived mention of the phrase on the internet. | |
| Args: | |
| phrase: phrase first archived mention wants to find | |
| """ | |
| timestamp, snapshot_url = find_first_mention(phrase) | |
| if timestamp and snapshot_url: | |
| return f"First mention found at {timestamp}: {snapshot_url}" | |
| else: | |
| return "No mentions found or an error occurred." | |
| def get_current_time_in_timezone(timezone: str) -> str: | |
| """A tool that fetches the current local time in a specified timezone. | |
| Args: | |
| timezone: A string representing a valid timezone (e.g., 'America/New_York'). | |
| """ | |
| try: | |
| # Create timezone object | |
| tz = pytz.timezone(timezone) | |
| # Get current time in that timezone | |
| local_time = datetime.datetime.now(tz).strftime("%Y-%m-%d %H:%M:%S") | |
| return f"The current local time in {timezone} is: {local_time}" | |
| except Exception as e: | |
| return f"Error fetching time for timezone '{timezone}': {str(e)}" | |
| final_answer = FinalAnswerTool() | |
| # If the agent does not answer, the model is overloaded, please use another model or the following Hugging Face Endpoint that also contains qwen2.5 coder: | |
| # model_id='https://pflgm2locj2t89co.us-east-1.aws.endpoints.huggingface.cloud' | |
| model = HfApiModel( | |
| max_tokens=2096, | |
| temperature=0.5, | |
| model_id='Qwen/Qwen2.5-Coder-32B-Instruct',# it is possible that this model may be overloaded | |
| custom_role_conversions=None, | |
| ) | |
| # Import tool from Hub | |
| image_generation_tool = load_tool("agents-course/text-to-image", trust_remote_code=True) | |
| with open("prompts.yaml", 'r') as stream: | |
| prompt_templates = yaml.safe_load(stream) | |
| agent = CodeAgent( | |
| model=model, | |
| tools=[final_answer, first_mention, get_current_time_in_timezone], ## add your tools here (don't remove final answer) | |
| max_steps=6, | |
| verbosity_level=1, | |
| grammar=None, | |
| planning_interval=None, | |
| name=None, | |
| description=None, | |
| prompt_templates=prompt_templates | |
| ) | |
| GradioUI(agent).launch() |