QUEST / app.py
TomLii's picture
Polish icons, examples, and blue control styling
4b3dbbd
raw
history blame
22.5 kB
import json
import os
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Dict, List, Optional, Set, Tuple
import gradio as gr
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from huggingface_hub import InferenceClient
DEFAULT_FREE_MODELS = [
# Newer free-friendly candidates (availability depends on HF Inference quota/region)
"Qwen/Qwen3-8B",
"google/gemma-3-12b-it",
"deepseek-ai/DeepSeek-R1-Distill-Qwen-7B",
# Fallback older but usually reliable
"Qwen/Qwen2.5-7B-Instruct",
"meta-llama/Llama-3.1-8B-Instruct",
]
DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", DEFAULT_FREE_MODELS[0])
PAPER_URL = os.getenv("PAPER_URL", "#")
CODE_URL = os.getenv("CODE_URL", "#")
DATASET_URL = os.getenv("DATASET_URL", "#")
MODEL_URL = os.getenv("MODEL_URL", "#")
SYSTEM_PROMPT = """You are a Deep Research assistant.
You can think step by step, use tools, and then return a final answer.
Tool protocol:
- To call a tool, output exactly one block:
<tool_call>
{"name":"search","arguments":{"query":"...","max_results":5}}
</tool_call>
or
<tool_call>
{"name":"visit","arguments":{"url":"...","max_chars":6000}}
</tool_call>
- When you are done, output:
<answer>
...final answer...
</answer>
Rules:
- Use tools when needed, but avoid repeated calls to the same URL/query.
- Cite useful URLs in your final answer.
- If a tool fails, recover and continue.
"""
TOOL_RESPONSE_TEMPLATE = """<tool_response>
{payload}
</tool_response>"""
SEARCH_CACHE: Dict[str, Dict[str, Any]] = {}
VISIT_CACHE: Dict[str, Dict[str, Any]] = {}
LOGO_PATH = str(Path(__file__).resolve().parent / "assets" / "quest-logo.png")
CUSTOM_CSS = """
@import url('https://fonts.googleapis.com/css2?family=Manrope:wght@400;500;600;700&display=swap');
.gradio-container {
max-width: 1200px !important;
font-family: 'Manrope', 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
background: #f3f8ff !important;
color: #0b2a66;
}
.gradio-container * {
font-family: inherit !important;
}
.banner-card {
padding: 18px 20px;
border-radius: 18px;
background: linear-gradient(100deg, #f8fbff 0%, #eef5ff 100%);
color: #0f172a;
border: 1px solid #dbe7ff;
box-shadow: 0 8px 24px rgba(15, 23, 42, 0.08);
margin-bottom: 14px;
}
.banner-inner {
display: flex;
align-items: center;
gap: 18px;
}
.banner-logo-wrap {
min-width: 250px;
}
.banner-logo-image img {
width: 100%;
max-width: 360px;
height: auto;
object-fit: contain;
border-radius: 0;
background: transparent;
border: none;
}
.banner-title {
font-size: 24px;
font-weight: 700;
line-height: 1.15;
color: #1e3a8a;
}
.banner-subtitle {
margin-top: 6px;
opacity: 0.9;
font-size: 14px;
color: #1f2937;
}
.section-card {
border: none !important;
border-radius: 0 !important;
padding: 0 !important;
background: transparent !important;
box-shadow: none !important;
}
.section-title {
font-size: 13px;
font-weight: 700;
color: #2454b8;
margin-bottom: 6px;
letter-spacing: 0.02em;
}
.layout-gap {
gap: 18px;
}
.right-stack > * {
margin-bottom: 8px;
}
.icon-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 10px;
width: 100%;
max-width: 360px;
margin: 0 auto;
}
.icon-link {
display: flex;
align-items: center;
justify-content: center;
gap: 8px;
padding: 12px 14px;
border: none;
border-radius: 12px;
text-decoration: none !important;
color: #1d4ed8 !important;
background: #eaf2ff;
font-weight: 600;
font-size: 17px;
}
.icon-link:hover {
background: #dbeafe;
}
.gradio-container .gr-button-primary {
background: linear-gradient(135deg, #2563eb 0%, #3b82f6 100%) !important;
color: white !important;
border: none !important;
box-shadow: 0 6px 16px rgba(37, 99, 235, 0.25) !important;
}
.gradio-container .gr-button-secondary {
background: #ffffff !important;
color: #1d4ed8 !important;
border: 1px solid #bfdbfe !important;
box-shadow: inset 0 0 0 1px #bfdbfe !important;
}
.gradio-container .gr-button-stop {
background: #ffffff !important;
color: #1d4ed8 !important;
border: 1px solid #bfdbfe !important;
box-shadow: inset 0 0 0 1px #bfdbfe !important;
}
.gradio-container .gr-group,
.gradio-container .gr-box,
.gradio-container .gr-panel {
border: none !important;
box-shadow: none !important;
background: transparent !important;
}
.gradio-container .tabs {
border: none !important;
background: transparent !important;
}
.gradio-container .tabitem {
border: none !important;
background: transparent !important;
}
.gradio-container .gr-form,
.gradio-container .form {
background: #ffffff !important;
}
.gradio-container textarea,
.gradio-container input {
background: #ffffff !important;
border: none !important;
box-shadow: inset 0 0 0 1px #dbeafe !important;
border-radius: 12px !important;
}
.gradio-container .gr-markdown a {
color: #1d4ed8 !important;
}
.inline-example-title {
font-size: 13px;
font-weight: 700;
color: #2454b8;
margin: 8px 0 2px 0;
}
.example-buttons {
display: grid;
gap: 10px;
margin-top: 8px;
}
.example-note {
color: #3b82f6;
font-size: 12px;
margin-top: 2px;
}
.no-frame {
background: transparent !important;
border: none !important;
box-shadow: none !important;
}
.slider-root .noUi-target,
.slider-root [role="slider"] {
--track-color: #dbeafe;
--range-color: #2563eb;
}
.slider-root .noUi-target {
background: #dbeafe !important;
}
.slider-root .noUi-connect {
background: #2563eb !important;
}
.gradio-container .noUi-target {
border: none !important;
box-shadow: none !important;
background: #eaf2ff !important;
}
.gradio-container .noUi-connect {
background: #2563eb !important;
}
.gradio-container .noUi-handle {
border: none !important;
box-shadow: 0 0 0 2px #2563eb !important;
background: #ffffff !important;
}
.gradio-container [data-testid="block-examples"] {
border: none !important;
background: #eef4ff !important;
border-radius: 12px !important;
padding: 8px !important;
}
.gradio-container [data-testid="block-examples"] table,
.gradio-container [data-testid="block-examples"] thead,
.gradio-container [data-testid="block-examples"] tbody,
.gradio-container [data-testid="block-examples"] tr,
.gradio-container [data-testid="block-examples"] td {
border: none !important;
background: transparent !important;
}
.gradio-container [data-testid="block-examples"] button {
background: #ffffff !important;
color: #1d4ed8 !important;
border: none !important;
border-radius: 10px !important;
box-shadow: inset 0 0 0 1px #bfdbfe !important;
font-size: 13px !important;
}
.gradio-container [data-testid="block-examples"] button:hover {
background: #dbeafe !important;
}
"""
@dataclass
class AgentState:
searched_queries: List[str] = field(default_factory=list)
visited_urls: List[str] = field(default_factory=list)
searched_query_set: Set[str] = field(default_factory=set)
visited_url_set: Set[str] = field(default_factory=set)
trusted_notes: List[str] = field(default_factory=list)
trace: List[Dict[str, Any]] = field(default_factory=list)
def extract_answer(text: str) -> Optional[str]:
match = re.search(r"<answer>\s*(.*?)\s*</answer>", text, flags=re.DOTALL | re.IGNORECASE)
return match.group(1).strip() if match else None
def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
match = re.search(r"<tool_call>\s*(.*?)\s*</tool_call>", text, flags=re.DOTALL | re.IGNORECASE)
if not match:
return None, None, None
payload = match.group(1).strip()
try:
data = json.loads(payload)
except json.JSONDecodeError:
return None, None, "Invalid JSON in <tool_call> block."
name = data.get("name")
arguments = data.get("arguments", {})
if not isinstance(name, str) or not isinstance(arguments, dict):
return None, None, "Invalid tool format. Expect name(str) and arguments(dict)."
return name, arguments, None
def run_search(query: str, max_results: int = 5) -> Dict[str, Any]:
if not query.strip():
return {"ok": False, "error": "Search query cannot be empty."}
cache_key = f"{query.strip().lower()}::{max_results}"
if cache_key in SEARCH_CACHE:
return {**SEARCH_CACHE[cache_key], "cached": True}
rows: List[Dict[str, str]] = []
with DDGS() as ddgs:
for item in ddgs.text(query, max_results=max_results):
rows.append(
{
"title": item.get("title", ""),
"href": item.get("href", ""),
"body": item.get("body", ""),
}
)
payload = {"ok": True, "query": query, "results": rows, "cached": False}
SEARCH_CACHE[cache_key] = payload
return payload
def _clean_html_to_text(html: str, max_chars: int) -> str:
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
text = soup.get_text(separator=" ", strip=True)
text = re.sub(r"\s+", " ", text)
return text[:max_chars]
def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
if not url.strip():
return {"ok": False, "error": "URL cannot be empty."}
cache_key = f"{url.strip()}::{max_chars}"
if cache_key in VISIT_CACHE:
return {**VISIT_CACHE[cache_key], "cached": True}
try:
resp = requests.get(
url,
timeout=20,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepResearchSpace/1.0)"},
)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "text/html" in content_type or "<html" in resp.text[:200].lower():
text = _clean_html_to_text(resp.text, max_chars=max_chars)
else:
text = resp.text[:max_chars]
payload = {"ok": True, "url": url, "content": text, "cached": False}
VISIT_CACHE[cache_key] = payload
return payload
except Exception as exc:
return {"ok": False, "url": url, "error": str(exc)}
def call_model(
client: InferenceClient,
messages: List[Dict[str, str]],
preferred_model: str,
candidate_models: List[str],
temperature: float,
max_new_tokens: int,
) -> Tuple[str, str]:
model_order: List[str] = []
for m in [preferred_model] + candidate_models:
if m and m not in model_order:
model_order.append(m)
last_error = None
for model_name in model_order:
try:
completion = client.chat_completion(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
)
return completion.choices[0].message.content or "", model_name
except Exception as exc:
last_error = exc
continue
raise RuntimeError(f"All model candidates failed. Last error: {last_error}")
def build_research_agent(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
) -> Tuple[str, str]:
token = os.getenv("HF_TOKEN")
client = InferenceClient(token=token)
state = AgentState()
used_model = model
recent_model_candidates = [m for m in DEFAULT_FREE_MODELS if m != model]
messages: List[Dict[str, str]] = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": question},
]
final_answer: Optional[str] = None
for turn in range(1, max_turns + 1):
if state.trusted_notes and turn > 1 and turn % 3 == 0:
summary_lines = "\n".join(f"- {n}" for n in state.trusted_notes[-6:])
messages.append(
{
"role": "user",
"content": f"RESEARCH STATE SUMMARY\n{summary_lines}\nUse this summary to avoid repeating work.",
}
)
model_output, used_model = call_model(
client=client,
messages=messages,
preferred_model=model,
candidate_models=recent_model_candidates,
temperature=temperature,
max_new_tokens=1400,
)
messages.append({"role": "assistant", "content": model_output})
state.trace.append({"turn": turn, "assistant": model_output})
extracted_answer = extract_answer(model_output)
if extracted_answer:
final_answer = extracted_answer
break
tool_name, tool_args, tool_err = parse_tool_call(model_output)
if tool_err:
tool_response = {"ok": False, "error": tool_err}
elif not tool_name:
# No explicit tool call and no final answer: force finalization.
messages.append(
{
"role": "user",
"content": "No tool call detected. Provide your best final answer in <answer>...</answer> now.",
}
)
continue
else:
if tool_name == "search":
query = str(tool_args.get("query", "")).strip()
max_results = int(tool_args.get("max_results", max_search_results))
max_results = max(1, min(max_results, 10))
if query in state.searched_query_set:
tool_response = {
"ok": True,
"query": query,
"cached": True,
"note": "This query was already searched. Reusing cached result to avoid duplicate work.",
"results": [],
}
else:
state.searched_queries.append(query)
state.searched_query_set.add(query)
tool_response = run_search(query=query, max_results=max_results)
if tool_response.get("ok"):
first_titles = [r.get("title", "") for r in tool_response.get("results", [])[:2]]
if first_titles:
state.trusted_notes.append(
f"Searched '{query}' and found leads: {', '.join(t for t in first_titles if t)}"
)
elif tool_name == "visit":
url = str(tool_args.get("url", "")).strip()
max_chars = int(tool_args.get("max_chars", 6000))
max_chars = max(500, min(max_chars, 20000))
if url in state.visited_url_set:
tool_response = {
"ok": True,
"url": url,
"cached": True,
"note": "This URL was already visited. Reusing cached result to avoid duplicate work.",
}
else:
state.visited_urls.append(url)
state.visited_url_set.add(url)
tool_response = run_visit(url=url, max_chars=max_chars)
if tool_response.get("ok"):
snippet = str(tool_response.get("content", ""))[:180]
if snippet:
state.trusted_notes.append(
f"Visited {url} and extracted key context: {snippet}"
)
else:
tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
state.trace.append({"turn": turn, "tool": tool_name, "tool_response": tool_response})
messages.append(
{
"role": "user",
"content": TOOL_RESPONSE_TEMPLATE.format(
payload=json.dumps(tool_response, ensure_ascii=False)
),
}
)
if final_answer is None:
final_answer = (
"I could not finish a complete research answer within the configured turns. "
"Try increasing max turns or switching to a stronger model."
)
citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
if citations:
final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
trace_text = json.dumps(
{
"used_model": used_model,
"searched_queries": state.searched_queries,
"visited_urls": state.visited_urls,
"trusted_notes": state.trusted_notes[-10:],
"trace": state.trace,
},
ensure_ascii=False,
indent=2,
)
return final_answer, trace_text
def run_ui(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
):
if not question.strip():
return "Please input a question.", "{}"
if not os.getenv("HF_TOKEN"):
warning = (
"HF_TOKEN is not configured in Space Secrets. "
"Go to Settings -> Secrets -> add `HF_TOKEN`, then retry."
)
return warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
try:
return build_research_agent(
question=question,
model=model,
max_turns=max_turns,
max_search_results=max_search_results,
temperature=temperature,
)
except Exception as exc:
return f"Error: {exc}", json.dumps({"error": str(exc)}, ensure_ascii=False, indent=2)
with gr.Blocks(
title="DeepResearch Space Starter",
theme=gr.themes.Default(
text_size="md",
radius_size="md",
spacing_size="md",
),
css=CUSTOM_CSS,
) as demo:
with gr.Row(elem_classes="layout-gap"):
with gr.Column(scale=7):
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">Chat</div>')
question = gr.Textbox(
show_label=False,
placeholder="Ask anything you want to research...",
lines=6,
)
with gr.Row():
run_btn = gr.Button("Run Research", variant="primary", size="lg")
stop_btn = gr.Button("Stop", variant="stop", size="lg")
clear_btn = gr.Button("Clear", variant="secondary", size="lg")
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="inline-example-title">Try Examples</div>')
gr.HTML('<div class="example-note">Click to auto-fill the chat box</div>')
with gr.Column(elem_classes="example-buttons"):
ex1_btn = gr.Button("🌏 Plan a 2-day Tokyo trip under $250 with trade-offs", variant="secondary")
ex2_btn = gr.Button("πŸ€– Compare 3 open-source coding agents with pros/cons", variant="secondary")
ex3_btn = gr.Button("βš–οΈ RAG vs fine-tuning for legal QA: which and why?", variant="secondary")
with gr.Group(elem_classes="section-card"):
with gr.Tabs():
with gr.TabItem("Result"):
answer = gr.Markdown(label="Final Answer")
with gr.TabItem("Record"):
trace = gr.Code(label="Execution Trace (JSON)", language="json")
with gr.Column(scale=3, elem_classes="right-stack"):
with gr.Group(elem_classes=["section-card", "no-frame"]):
gr.Image(
value=LOGO_PATH,
show_label=False,
container=False,
interactive=False,
show_download_button=False,
show_fullscreen_button=False,
elem_classes="banner-logo-image",
)
with gr.Group(elem_classes=["section-card", "no-frame"]):
gr.HTML(
f"""
<div class="icon-grid">
<a class="icon-link" href="{PAPER_URL}" target="_blank" rel="noopener noreferrer">πŸ“„ Paper</a>
<a class="icon-link" href="{CODE_URL}" target="_blank" rel="noopener noreferrer">πŸ’» Code</a>
<a class="icon-link" href="{DATASET_URL}" target="_blank" rel="noopener noreferrer">πŸ—‚οΈ Dataset</a>
<a class="icon-link" href="{MODEL_URL}" target="_blank" rel="noopener noreferrer">🧠 Model</a>
</div>
"""
)
with gr.Group(elem_classes="section-card"):
gr.HTML('<div class="section-title">Settings</div>')
model = gr.Dropdown(
label="Model",
choices=DEFAULT_FREE_MODELS,
value=DEFAULT_MODEL if DEFAULT_MODEL in DEFAULT_FREE_MODELS else DEFAULT_FREE_MODELS[0],
allow_custom_value=True,
info="You can type any model id supported by HF Inference API.",
)
max_turns = gr.Slider(label="Max Turns", minimum=2, maximum=20, value=8, step=1, elem_classes="slider-root")
max_search_results = gr.Slider(
label="Search Results Per Query", minimum=1, maximum=10, value=5, step=1
, elem_classes="slider-root"
)
temperature = gr.Slider(
label="Temperature", minimum=0.0, maximum=1.5, value=0.4, step=0.1, elem_classes="slider-root"
)
run_event = run_btn.click(
fn=run_ui,
inputs=[question, model, max_turns, max_search_results, temperature],
outputs=[answer, trace],
)
ex1_btn.click(
fn=lambda: "Plan a 2-day food + museum itinerary in Tokyo under $250, with trade-offs.",
inputs=[],
outputs=[question],
)
ex2_btn.click(
fn=lambda: "Find 3 open-source coding agents and compare real strengths/limitations.",
inputs=[],
outputs=[question],
)
ex3_btn.click(
fn=lambda: "Explain whether RAG or fine-tuning is better for a legal QA assistant, and why.",
inputs=[],
outputs=[question],
)
stop_btn.click(fn=None, cancels=[run_event])
clear_btn.click(
fn=lambda: ("", "", "{}"),
inputs=[],
outputs=[question, answer, trace],
)
if __name__ == "__main__":
demo.launch()