Spaces:

osunlp
/

QUEST

Running

App Files Files Community

TomLii commited on Apr 17

Commit

dd903d9

1 Parent(s): f656413

Add model fallback plus lightweight memory and cache

Browse files

Files changed (1) hide show

app.py +98 -20

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import json
 import os
 import re
 from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional, Tuple
 import gradio as gr
 import requests
@@ -11,12 +11,16 @@ from duckduckgo_search import DDGS
 from huggingface_hub import InferenceClient
-DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "Qwen/Qwen2.5-7B-Instruct")
 DEFAULT_FREE_MODELS = [
     "Qwen/Qwen2.5-7B-Instruct",
     "meta-llama/Llama-3.1-8B-Instruct",
-    "mistralai/Mistral-7B-Instruct-v0.3",
 ]
 SYSTEM_PROMPT = """You are a Deep Research assistant.
 You can think step by step, use tools, and then return a final answer.
@@ -47,6 +51,9 @@ TOOL_RESPONSE_TEMPLATE = """<tool_response>
 {payload}
 </tool_response>"""
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700&display=swap');
@@ -100,6 +107,9 @@ CUSTOM_CSS = """
 class AgentState:
     searched_queries: List[str] = field(default_factory=list)
     visited_urls: List[str] = field(default_factory=list)
     trace: List[Dict[str, Any]] = field(default_factory=list)
@@ -128,6 +138,10 @@ def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]],
 def run_search(query: str, max_results: int = 5) -> Dict[str, Any]:
     if not query.strip():
         return {"ok": False, "error": "Search query cannot be empty."}
     rows: List[Dict[str, str]] = []
     with DDGS() as ddgs:
         for item in ddgs.text(query, max_results=max_results):
@@ -138,7 +152,9 @@ def run_search(query: str, max_results: int = 5) -> Dict[str, Any]:
                     "body": item.get("body", ""),
                 }
             )
-    return {"ok": True, "query": query, "results": rows}
 def _clean_html_to_text(html: str, max_chars: int) -> str:
@@ -153,6 +169,9 @@ def _clean_html_to_text(html: str, max_chars: int) -> str:
 def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
     if not url.strip():
         return {"ok": False, "error": "URL cannot be empty."}
     try:
         resp = requests.get(
             url,
@@ -165,7 +184,9 @@ def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
             text = _clean_html_to_text(resp.text, max_chars=max_chars)
         else:
             text = resp.text[:max_chars]
-        return {"ok": True, "url": url, "content": text}
     except Exception as exc:
         return {"ok": False, "url": url, "error": str(exc)}
@@ -173,17 +194,30 @@ def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
 def call_model(
     client: InferenceClient,
     messages: List[Dict[str, str]],
-    model: str,
     temperature: float,
     max_new_tokens: int,
-) -> str:
-    completion = client.chat_completion(
-        model=model,
-        messages=messages,
-        temperature=temperature,
-        max_tokens=max_new_tokens,
-    )
-    return completion.choices[0].message.content or ""
 def build_research_agent(
@@ -196,6 +230,8 @@ def build_research_agent(
     token = os.getenv("HF_TOKEN")
     client = InferenceClient(token=token)
     state = AgentState()
     messages: List[Dict[str, str]] = [
         {"role": "system", "content": SYSTEM_PROMPT},
@@ -205,10 +241,20 @@ def build_research_agent(
     final_answer: Optional[str] = None
     for turn in range(1, max_turns + 1):
-        model_output = call_model(
             client=client,
             messages=messages,
-            model=model,
             temperature=temperature,
             max_new_tokens=1400,
         )
@@ -237,16 +283,45 @@ def build_research_agent(
                 query = str(tool_args.get("query", "")).strip()
                 max_results = int(tool_args.get("max_results", max_search_results))
                 max_results = max(1, min(max_results, 10))
-                if query:
                     state.searched_queries.append(query)
-                tool_response = run_search(query=query, max_results=max_results)
             elif tool_name == "visit":
                 url = str(tool_args.get("url", "")).strip()
                 max_chars = int(tool_args.get("max_chars", 6000))
                 max_chars = max(500, min(max_chars, 20000))
-                if url:
                     state.visited_urls.append(url)
-                tool_response = run_visit(url=url, max_chars=max_chars)
             else:
                 tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
@@ -267,13 +342,16 @@ def build_research_agent(
         )
     citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
     if citations:
         final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
     trace_text = json.dumps(
         {
             "searched_queries": state.searched_queries,
             "visited_urls": state.visited_urls,
             "trace": state.trace,
         },
         ensure_ascii=False,

 import os
 import re
 from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Set, Tuple
 import gradio as gr
 import requests
 from huggingface_hub import InferenceClient
 DEFAULT_FREE_MODELS = [
+    # Newer free-friendly candidates (availability depends on HF Inference quota/region)
+    "Qwen/Qwen3-8B",
+    "google/gemma-3-12b-it",
+    "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
+    # Fallback older but usually reliable
     "Qwen/Qwen2.5-7B-Instruct",
     "meta-llama/Llama-3.1-8B-Instruct",
 ]
+DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", DEFAULT_FREE_MODELS[0])
 SYSTEM_PROMPT = """You are a Deep Research assistant.
 You can think step by step, use tools, and then return a final answer.
 {payload}
 </tool_response>"""
+SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
+VISIT_CACHE: Dict[str, Dict[str, Any]] = {}
 CUSTOM_CSS = """
 @import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700&display=swap');
 class AgentState:
     searched_queries: List[str] = field(default_factory=list)
     visited_urls: List[str] = field(default_factory=list)
+    searched_query_set: Set[str] = field(default_factory=set)
+    visited_url_set: Set[str] = field(default_factory=set)
+    trusted_notes: List[str] = field(default_factory=list)
     trace: List[Dict[str, Any]] = field(default_factory=list)
 def run_search(query: str, max_results: int = 5) -> Dict[str, Any]:
     if not query.strip():
         return {"ok": False, "error": "Search query cannot be empty."}
+    cache_key = f"{query.strip().lower()}::{max_results}"
+    if cache_key in SEARCH_CACHE:
+        return {**SEARCH_CACHE[cache_key], "cached": True}
     rows: List[Dict[str, str]] = []
     with DDGS() as ddgs:
         for item in ddgs.text(query, max_results=max_results):
                     "body": item.get("body", ""),
                 }
             )
+    payload = {"ok": True, "query": query, "results": rows, "cached": False}
+    SEARCH_CACHE[cache_key] = payload
+    return payload
 def _clean_html_to_text(html: str, max_chars: int) -> str:
 def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
     if not url.strip():
         return {"ok": False, "error": "URL cannot be empty."}
+    cache_key = f"{url.strip()}::{max_chars}"
+    if cache_key in VISIT_CACHE:
+        return {**VISIT_CACHE[cache_key], "cached": True}
     try:
         resp = requests.get(
             url,
             text = _clean_html_to_text(resp.text, max_chars=max_chars)
         else:
             text = resp.text[:max_chars]
+        payload = {"ok": True, "url": url, "content": text, "cached": False}
+        VISIT_CACHE[cache_key] = payload
+        return payload
     except Exception as exc:
         return {"ok": False, "url": url, "error": str(exc)}
 def call_model(
     client: InferenceClient,
     messages: List[Dict[str, str]],
+    preferred_model: str,
+    candidate_models: List[str],
     temperature: float,
     max_new_tokens: int,
+) -> Tuple[str, str]:
+    model_order: List[str] = []
+    for m in [preferred_model] + candidate_models:
+        if m and m not in model_order:
+            model_order.append(m)
+    last_error = None
+    for model_name in model_order:
+        try:
+            completion = client.chat_completion(
+                model=model_name,
+                messages=messages,
+                temperature=temperature,
+                max_tokens=max_new_tokens,
+            )
+            return completion.choices[0].message.content or "", model_name
+        except Exception as exc:
+            last_error = exc
+            continue
+    raise RuntimeError(f"All model candidates failed. Last error: {last_error}")
 def build_research_agent(
     token = os.getenv("HF_TOKEN")
     client = InferenceClient(token=token)
     state = AgentState()
+    used_model = model
+    recent_model_candidates = [m for m in DEFAULT_FREE_MODELS if m != model]
     messages: List[Dict[str, str]] = [
         {"role": "system", "content": SYSTEM_PROMPT},
     final_answer: Optional[str] = None
     for turn in range(1, max_turns + 1):
+        if state.trusted_notes and turn > 1 and turn % 3 == 0:
+            summary_lines = "\n".join(f"- {n}" for n in state.trusted_notes[-6:])
+            messages.append(
+                {
+                    "role": "user",
+                    "content": f"RESEARCH STATE SUMMARY\n{summary_lines}\nUse this summary to avoid repeating work.",
+                }
+            )
+        model_output, used_model = call_model(
             client=client,
             messages=messages,
+            preferred_model=model,
+            candidate_models=recent_model_candidates,
             temperature=temperature,
             max_new_tokens=1400,
         )
                 query = str(tool_args.get("query", "")).strip()
                 max_results = int(tool_args.get("max_results", max_search_results))
                 max_results = max(1, min(max_results, 10))
+                if query in state.searched_query_set:
+                    tool_response = {
+                        "ok": True,
+                        "query": query,
+                        "cached": True,
+                        "note": "This query was already searched. Reusing cached result to avoid duplicate work.",
+                        "results": [],
+                    }
+                else:
                     state.searched_queries.append(query)
+                    state.searched_query_set.add(query)
+                    tool_response = run_search(query=query, max_results=max_results)
+                    if tool_response.get("ok"):
+                        first_titles = [r.get("title", "") for r in tool_response.get("results", [])[:2]]
+                        if first_titles:
+                            state.trusted_notes.append(
+                                f"Searched '{query}' and found leads: {', '.join(t for t in first_titles if t)}"
+                            )
             elif tool_name == "visit":
                 url = str(tool_args.get("url", "")).strip()
                 max_chars = int(tool_args.get("max_chars", 6000))
                 max_chars = max(500, min(max_chars, 20000))
+                if url in state.visited_url_set:
+                    tool_response = {
+                        "ok": True,
+                        "url": url,
+                        "cached": True,
+                        "note": "This URL was already visited. Reusing cached result to avoid duplicate work.",
+                    }
+                else:
                     state.visited_urls.append(url)
+                    state.visited_url_set.add(url)
+                    tool_response = run_visit(url=url, max_chars=max_chars)
+                    if tool_response.get("ok"):
+                        snippet = str(tool_response.get("content", ""))[:180]
+                        if snippet:
+                            state.trusted_notes.append(
+                                f"Visited {url} and extracted key context: {snippet}"
+                            )
             else:
                 tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
         )
     citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
+    final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
     if citations:
         final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
     trace_text = json.dumps(
         {
+            "used_model": used_model,
             "searched_queries": state.searched_queries,
             "visited_urls": state.visited_urls,
+            "trusted_notes": state.trusted_notes[-10:],
             "trace": state.trace,
         },
         ensure_ascii=False,