import os import requests API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-alpha" HEADERS = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"} system_prompt = ( "You are an expert AI assistant taking a benchmark test for reasoning. " "Answer only the main question directly. Do not explain. Do not show work. " "If you see images or documents mentioned, assume you have access and extract answer. " "Use search or tools if needed. Answer format must be plain with no prefix or suffix." ) class BasicAgent: def __init__(self): print("✅ Agent initialized with Zephyr 7B.") def __call__(self, question: str) -> str: prompt = f"{system_prompt}\n\nQuestion: {question}\nAnswer:" try: response = requests.post( API_URL, headers=HEADERS, json={"inputs": prompt}, timeout=40 ) response.raise_for_status() result = response.json() if isinstance(result, list): return result[0]["generated_text"].split("Answer:")[-1].strip() elif "generated_text" in result: return result["generated_text"].strip() else: return "ERROR: Unexpected response format" except Exception as e: return f"AGENT ERROR: {e}"