Final_Assignment_Template

Sleeping

App Files Files Community

ABVM commited on Jun 7, 2025

Commit

262515c

verified ·

1 Parent(s): 6b0e1b2

Upload 2 files

Browse files

Files changed (2) hide show

multi_agent.py +154 -0
throttle.py +33 -0

multi_agent.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from smolagents import (
+    CodeAgent,
+    VisitWebpageTool,
+    WebSearchTool,
+    WikipediaSearchTool,
+    PythonInterpreterTool,
+    FinalAnswerTool,
+    LiteLLMModel,
+)
+from vision_tool import image_reasoning_tool
+from throttle import consume
+import os
+import time
+# ---- TOOLS ----
+common = dict(
+    api_key=os.getenv("GROQ_API_KEY"),
+    api_base="https://api.groq.com/openai/v1",
+    flatten_messages_as_text=True,
+)
+# ---- MULTI-AGENT SYSTEM ----
+class MultyAgentSystem:
+    def __init__(self):
+        self.deepseek_model = LiteLLMModel(
+            "groq/deepseek-r1-distill-llama-70b",
+            max_tokens=512,
+            **common,
+        )
+        self.qwen_model = LiteLLMModel("groq/qwen-qwq-32b", **common)
+        self.fallback_model = LiteLLMModel("groq/llama3-70b-8k", **common)
+        self.verification_limit = int(os.getenv("VERIFY_WORD_LIMIT", "75"))
+        # --- Web agent definition ---
+        self.web_agent = CodeAgent(
+            model=self.qwen_model,
+            tools=[WebSearchTool(), VisitWebpageTool(), WikipediaSearchTool()],
+            name="web_agent",
+            description=(
+                "You are a web browsing agent. Whenever the given {task} involves browsing "
+                "the web or a specific website such as Wikipedia or YouTube, you will use "
+                "the provided tools. For web-based factual and retrieval tasks, be as precise and source-reliable as possible."
+            ),
+            additional_authorized_imports=[
+                "markdownify",
+                "json",
+                "requests",
+                "urllib.request",
+                "urllib.parse",
+                "wikipedia-api",
+            ],
+            verbosity_level=0,
+            max_steps=10,
+        )
+        # --- Info agent definition ---
+        self.info_agent = CodeAgent(
+            model=self.qwen_model,
+            tools=[PythonInterpreterTool(), image_reasoning_tool],
+            name="info_agent",
+            description=(
+                "You are an agent tasked with cleaning, parsing, calculating information, and performing OCR if images are provided in the {task}. "
+                "You can also analyze images using a vision model. You handle all math, code, and data manipulation. Use numpy, math, and available libraries. "
+                "For image or chess tasks, use pytesseract, PIL, chess, or the image_reasoning_tool as required."
+            ),
+            additional_authorized_imports=[
+                "numpy",
+                "math",
+                "pytesseract",
+                "PIL",
+                "chess",
+            ],
+        )
+        # --- Manager agent definition ---
+        manager_planning_interval = int(os.getenv("MANAGER_PLANNING_INTERVAL", "3"))
+        manager_max_steps = int(os.getenv("MANAGER_MAX_STEPS", "8"))
+        # The manager starts with the smaller Qwen model to minimize token usage
+        # and only relies on DeepSeek when verifying critical answers.
+        self.manager_agent = CodeAgent(
+            model=self.qwen_model,
+            tools=[FinalAnswerTool()],
+            managed_agents=[self.web_agent, self.info_agent],
+            name="manager_agent",
+            description=(
+                "You are the manager. Given a {task}, plan which agent to use: "
+                "If web data is needed, delegate to web_agent. If math, parsing, image reasoning, or code is needed, use info_agent. "
+                "After collecting outputs, optionally cross-validate and check correctness, then finalize and submit the best answer using FinalAnswerTool. "
+                "For each task, explicitly explain your planning steps and reasons for choosing which agent, and always prefer the most accurate and complete answer possible."
+            ),
+            additional_authorized_imports=[
+                "json",
+                "pandas",
+                "numpy",
+            ],
+            planning_interval=manager_planning_interval,
+            verbosity_level=2,
+            max_steps=manager_max_steps,
+        )
+        # runtime tracking for fallback switching
+        self.total_runtime = 0.0
+        self.first_call_duration = None
+        self.model_switched = False
+    def _switch_to_fallback(self):
+        if self.model_switched:
+            return
+        self.manager_agent.model = self.fallback_model
+        self.model_switched = True
+    def run(self, question, high_stakes: bool = False, **kwargs):
+        start_time = time.time()
+        print("Generating initial answer with Qwen-32B")
+        initial_answer = self.manager_agent(question, **kwargs)
+        call_duration = time.time() - start_time
+        answer = initial_answer
+        if high_stakes or len(initial_answer.split()) > self.verification_limit:
+            print("Verifying answer using DeepSeek-70B")
+            verification_prompt = (
+                "Review the following answer for accuracy and rewrite if needed:"
+                f"\n\n{initial_answer}"
+            )
+            try:
+                max_completion_tokens = kwargs.get("max_completion_tokens", 512)
+                prompt_tokens = len(verification_prompt.split())
+                consume(prompt_tokens + max_completion_tokens)
+                answer = self.deepseek_model(
+                    verification_prompt, max_completion_tokens=max_completion_tokens
+                )
+            except Exception as e:
+                print(f"Verification failed: {e}. Using initial answer.")
+                answer = initial_answer
+        if self.first_call_duration is None:
+            self.first_call_duration = call_duration
+            if self.first_call_duration > 30:
+                self._switch_to_fallback()
+        self.total_runtime += call_duration
+        if self.total_runtime > 300 and not self.model_switched:
+            self._switch_to_fallback()
+        return answer
+    def __call__(self, question, high_stakes: bool = False, **kwargs):
+        return self.run(question, high_stakes=high_stakes, **kwargs)

throttle.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import threading
+import time
+TOKENS_PER_MINUTE = 5500
+_tokens_per_second = TOKENS_PER_MINUTE / 60.0
+_capacity = TOKENS_PER_MINUTE
+_lock = threading.Lock()
+_tokens = _capacity
+_last_timestamp = time.monotonic()
+def consume(n: int) -> None:
+    """Consume *n* tokens, waiting if necessary.
+    This function implements a simple thread-safe token bucket to keep
+    requests under the configured tokens-per-minute rate.
+    """
+    global _tokens, _last_timestamp
+    if n <= 0:
+        return
+    while True:
+        with _lock:
+            now = time.monotonic()
+            elapsed = now - _last_timestamp
+            _tokens = min(_capacity, _tokens + elapsed * _tokens_per_second)
+            _last_timestamp = now
+            if n <= _tokens:
+                _tokens -= n
+                return
+            needed = n - _tokens
+            wait_time = needed / _tokens_per_second
+            _tokens = 0
+        time.sleep(wait_time)