block."
name = data.get("name")
arguments = data.get("arguments", {})
if not isinstance(name, str) or not isinstance(arguments, dict):
return None, None, "Invalid tool format. Expect name(str) and arguments(dict)."
return name, arguments, None
_SEARCH_UNAVAILABLE_HINT = (
"The web-search backend is currently rate-limited or unreachable. "
"If this question can be answered confidently from your own training "
"knowledge (e.g. common product specs, historical facts, definitions), "
"please produce your best answer now inside ..., and "
"mention any value that might be out of date. Only ask the user to "
"retry later if the question truly requires a fresh web lookup."
)
# Google Serper API key. Either SERPER_API_KEY or SERPER_KEY_ID is accepted
# so that the Space matches the env-var name used by the research repo.
SERPER_API_KEY = (
os.getenv("SERPER_API_KEY") or os.getenv("SERPER_KEY_ID") or ""
).strip()
SERPER_ENDPOINT = os.getenv("SERPER_ENDPOINT", "https://google.serper.dev/search")
def _serper_search(query: str, max_results: int) -> Dict[str, Any]:
"""Hit the Google Serper API. Returns the same shape as `_ddg_search`.
Serper responds in well under a second and is not subject to the 202
Ratelimit we get from html.duckduckgo.com, so preferring it when the
key is set cuts latency dramatically and eliminates most search
failures on shared Space IPs.
"""
try:
resp = requests.post(
SERPER_ENDPOINT,
headers={
"X-API-KEY": SERPER_API_KEY,
"Content-Type": "application/json",
},
json={"q": query, "num": max_results},
timeout=15,
)
resp.raise_for_status()
data = resp.json()
except Exception as exc:
return {
"ok": False,
"query": query,
"error": f"Serper error: {type(exc).__name__}: {exc}",
"results": [],
"backend": "serper",
}
rows: List[Dict[str, str]] = []
for item in (data.get("organic") or [])[:max_results]:
rows.append(
{
"title": item.get("title", ""),
"href": item.get("link", ""),
"body": item.get("snippet", ""),
}
)
# Fold in the answer box and knowledge graph when present; these often
# carry the exact fact the model is looking for in a compact form.
answer_box = data.get("answerBox") or {}
if answer_box:
rows.insert(
0,
{
"title": answer_box.get("title", "Answer box"),
"href": answer_box.get("link", ""),
"body": answer_box.get("snippet")
or answer_box.get("answer")
or "",
},
)
if not rows:
return {
"ok": False,
"query": query,
"error": "Serper returned no organic results",
"results": [],
"backend": "serper",
}
return {
"ok": True,
"query": query,
"results": rows,
"cached": False,
"backend": "serper",
}
def _ddg_search(query: str, max_results: int) -> Dict[str, Any]:
"""Fallback path: scrape DuckDuckGo. Rate-limits on shared IPs."""
last_exc: Optional[BaseException] = None
for attempt in range(2):
try:
rows: List[Dict[str, str]] = []
with DDGS() as ddgs:
for item in ddgs.text(query, max_results=max_results):
rows.append(
{
"title": item.get("title", ""),
"href": item.get("href", ""),
"body": item.get("body", ""),
}
)
return {
"ok": True,
"query": query,
"results": rows,
"cached": False,
"backend": "duckduckgo",
}
except Exception as exc:
last_exc = exc
if attempt == 0:
time.sleep(1.5)
continue
err = f"{type(last_exc).__name__}: {last_exc}" if last_exc else "unknown error"
return {
"ok": False,
"query": query,
"error": f"DuckDuckGo unavailable ({err}).",
"results": [],
"backend": "duckduckgo",
}
def _run_search_single(query: str, max_results: int) -> Dict[str, Any]:
"""Run one search query, preferring Serper when the key is set.
Returns a structured dict on both success and failure; never raises.
Order of preference:
1. Google Serper (fast, no scraping, requires `SERPER_API_KEY` /
`SERPER_KEY_ID`).
2. DuckDuckGo HTML backend (free, but rate-limits on shared Space IPs).
3. Graceful `ok: False` payload with a hint that tells the agent to
answer from its own knowledge if it reasonably can.
"""
if not query.strip():
return {"ok": False, "error": "Search query cannot be empty."}
cache_key = f"{query.strip().lower()}::{max_results}"
if cache_key in SEARCH_CACHE:
return {**SEARCH_CACHE[cache_key], "cached": True}
tried: List[Dict[str, Any]] = []
if SERPER_API_KEY:
serper_result = _serper_search(query, max_results)
if serper_result.get("ok"):
SEARCH_CACHE[cache_key] = serper_result
return serper_result
tried.append(serper_result)
ddg_result = _ddg_search(query, max_results)
if ddg_result.get("ok"):
SEARCH_CACHE[cache_key] = ddg_result
return ddg_result
tried.append(ddg_result)
# Both backends failed (or no Serper key and DDG rate-limited).
errors = "; ".join(
f"{r.get('backend', 'unknown')}: {r.get('error', 'no results')}"
for r in tried
)
return {
"ok": False,
"query": query,
"error": f"All search backends failed ({errors}).",
"results": [],
"hint": _SEARCH_UNAVAILABLE_HINT,
}
def run_search(query: Union[str, List[str]], max_results: int = 5) -> Dict[str, Any]:
"""Runs one or more queries through DuckDuckGo.
QUEST's schema passes `query` as an array of strings, while the simpler
starter schema used a single string. We accept both shapes.
"""
if isinstance(query, list):
sub_results: List[Dict[str, Any]] = []
for q in query:
if not isinstance(q, str) or not q.strip():
continue
sub_results.append(_run_search_single(q, max_results))
return {"ok": True, "queries": query, "results": sub_results}
return _run_search_single(str(query or "").strip(), max_results)
def _clean_html_to_text(html: str, max_chars: int) -> str:
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
text = soup.get_text(separator=" ", strip=True)
text = re.sub(r"\s+", " ", text)
return text[:max_chars]
def _run_visit_single(url: str, max_chars: int, goal: str = "") -> Dict[str, Any]:
if not url.strip():
return {"ok": False, "error": "URL cannot be empty."}
cache_key = f"{url.strip()}::{max_chars}"
if cache_key in VISIT_CACHE:
return {**VISIT_CACHE[cache_key], "cached": True, "goal": goal}
try:
resp = requests.get(
url,
timeout=20,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepResearchSpace/1.0)"},
)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "text/html" in content_type or " Dict[str, Any]:
"""Fetches one or more URLs. Accepts string or list (QUEST schema)."""
if isinstance(url, list):
sub_results: List[Dict[str, Any]] = []
for u in url:
if not isinstance(u, str) or not u.strip():
continue
sub_results.append(_run_visit_single(u, max_chars, goal))
return {"ok": True, "goal": goal, "results": sub_results}
return _run_visit_single(str(url or "").strip(), max_chars, goal)
def _build_client_for_model(model: str) -> Tuple[InferenceClient, str, List[str]]:
"""Returns (client, primary_model_id, fallback_model_ids).
When the user picks the Quest model and QUEST_BASE_URL is configured, the
InferenceClient is pointed at the dedicated endpoint; otherwise we hit the
shared HF Inference API and let the starter fall back across free models.
"""
token = os.getenv("HF_TOKEN")
if model == QUEST_MODEL_ID and QUEST_BASE_URL:
client = InferenceClient(
base_url=QUEST_BASE_URL,
token=token,
timeout=120,
)
return client, QUEST_ENDPOINT_MODEL, []
client = InferenceClient(token=token, timeout=60)
fallbacks = [m for m in FREE_FALLBACK_MODELS if m != model]
return client, model, fallbacks
def call_model(
client: InferenceClient,
messages: List[Dict[str, str]],
preferred_model: str,
candidate_models: List[str],
temperature: float,
max_new_tokens: int,
) -> Tuple[str, str]:
model_order: List[str] = []
for m in [preferred_model] + candidate_models:
if m and m not in model_order:
model_order.append(m)
last_error = None
for model_name in model_order:
try:
completion = client.chat_completion(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
)
return completion.choices[0].message.content or "", model_name
except Exception as exc:
last_error = exc
continue
raise RuntimeError(f"All model candidates failed. Last error: {last_error}")
def _render_progress(
lines: List[str],
used_model: str,
question: str,
) -> str:
"""Render the in-progress status view that replaces the Markdown panel
while the agent is still running, so the user is not staring at a blank
box for the 20-60 seconds a full Quest-4B research run can take."""
header = (
f"### ⏳ Researching…\n\n"
f"**Model:** `{used_model}` \n"
f"**Question:** {question.strip()[:200]}"
)
if not lines:
body = "_Starting agent…_"
else:
body = "\n".join(f"- {line}" for line in lines)
return f"{header}\n\n{body}"
def _trace_to_json(state: "AgentState", used_model: str) -> str:
return json.dumps(
{
"used_model": used_model,
"searched_queries": state.searched_queries,
"visited_urls": state.visited_urls,
"trusted_notes": state.trusted_notes[-10:],
"trace": state.trace,
},
ensure_ascii=False,
indent=2,
)
def build_research_agent(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
):
"""Run the ReAct research loop as a generator.
Each `yield` emits a `(markdown_for_answer_panel, json_for_record_panel)`
tuple. Intermediate yields show progress so that Gradio streams the
status lines into the UI as work happens. The last yield contains the
final answer and the final trace.
"""
client, primary_model, fallback_models = _build_client_for_model(model)
# Display label: the real HF repo id is nicer than the TGI shim name.
display_primary = model if (model == QUEST_MODEL_ID) else primary_model
state = AgentState()
used_model = display_primary
status_lines: List[str] = []
def _emit():
"""Yield the current progress snapshot to Gradio."""
return (
_render_progress(status_lines, used_model, question),
_trace_to_json(state, used_model),
)
messages: List[Dict[str, str]] = [
{"role": "system", "content": build_system_prompt()},
{"role": "user", "content": question},
]
final_answer: Optional[str] = None
status_lines.append("🚀 Starting research agent")
yield _emit()
for turn in range(1, max_turns + 1):
if state.trusted_notes and turn > 1 and turn % 3 == 0:
summary_lines = "\n".join(f"- {n}" for n in state.trusted_notes[-6:])
messages.append(
{
"role": "user",
"content": f"RESEARCH STATE SUMMARY\n{summary_lines}\nUse this summary to avoid repeating work.",
}
)
status_lines.append(f"🧠 turn {turn}: thinking…")
yield _emit()
t0 = time.time()
raw_output, endpoint_model = call_model(
client=client,
messages=messages,
preferred_model=primary_model,
candidate_models=fallback_models,
temperature=temperature,
max_new_tokens=int(os.getenv("QUEST_MAX_NEW_TOKENS", "4096")),
)
dt = time.time() - t0
model_output = raw_output
# Preserve the human-friendly model id for the trace even if the
# endpoint ignores the "model" param and returns the TGI shim name.
used_model = display_primary if endpoint_model == primary_model == QUEST_ENDPOINT_MODEL else endpoint_model
messages.append({"role": "assistant", "content": model_output})
state.trace.append({"turn": turn, "assistant": model_output, "elapsed_s": round(dt, 2)})
status_lines[-1] = f"🧠 turn {turn}: model reply in {dt:.1f}s"
yield _emit()
extracted_answer = extract_answer(model_output)
if extracted_answer:
final_answer = extracted_answer
status_lines.append("✍️ writing final answer")
yield _emit()
break
tool_name, tool_args, tool_err = parse_tool_call(model_output)
if tool_err:
tool_response = {"ok": False, "error": tool_err}
status_lines.append(f"⚠️ turn {turn}: malformed tool call — {tool_err}")
yield _emit()
elif not tool_name:
# No explicit tool call and no final answer: force finalization.
# IMPORTANT: do not write the literal characters `...`
# here. Some models (notably the Qwen3 family that Quest-4B is
# built on) will echo the template verbatim, which means the
# extracted answer ends up being the three-dot placeholder `...`
# and the user sees an empty-looking result.
messages.append(
{
"role": "user",
"content": (
"You did not call a tool and did not produce a final "
"answer. Please now write your best final answer, "
"wrapped between an opening tag and a "
"closing tag. Put the real answer text "
"between those tags; do not write a literal ellipsis "
"or other placeholder. If the question asks for "
"tabular data, use GitHub-Flavored Markdown pipe "
"tables (`| col1 | col2 |` + `|---|---|`) and put a "
"blank line before the first row so the table renders."
),
}
)
status_lines.append(f"🙃 turn {turn}: model stalled; asking for an answer")
yield _emit()
continue
else:
if tool_name == "search":
raw_query = tool_args.get("query", "")
queries: List[str]
if isinstance(raw_query, list):
queries = [str(q).strip() for q in raw_query if str(q).strip()]
else:
queries = [str(raw_query).strip()] if str(raw_query).strip() else []
max_results = int(tool_args.get("max_results", max_search_results))
max_results = max(1, min(max_results, 10))
queries_preview = ", ".join(f"`{q}`" for q in queries) or "_(empty)_"
status_lines.append(f"🔍 turn {turn}: searching {queries_preview}")
yield _emit()
per_query: List[Dict[str, Any]] = []
backend_labels: List[str] = []
hits_total = 0
for q in queries:
if q in state.searched_query_set:
per_query.append({
"ok": True,
"query": q,
"cached": True,
"note": "Already searched; reusing cached result.",
"results": [],
})
backend_labels.append("cache")
continue
state.searched_queries.append(q)
state.searched_query_set.add(q)
single = _run_search_single(q, max_results)
per_query.append(single)
backend_labels.append(single.get("backend", "unknown"))
if single.get("ok"):
hits_total += len(single.get("results", []))
first_titles = [r.get("title", "") for r in single.get("results", [])[:2]]
if first_titles:
state.trusted_notes.append(
f"Searched '{q}' and found leads: {', '.join(t for t in first_titles if t)}"
)
else:
status_lines.append(
f"⚠️ search failed on `{q}` via {single.get('backend', 'unknown')}: "
f"{single.get('error', 'no results')}"
)
tool_response = (
per_query[0]
if len(per_query) == 1
else {"ok": True, "queries": queries, "results": per_query}
)
unique_backends = sorted(set(backend_labels))
backend_str = "/".join(unique_backends) if unique_backends else "?"
status_lines.append(
f"✅ turn {turn}: got {hits_total} hit(s) via {backend_str}"
)
yield _emit()
elif tool_name == "visit":
raw_url = tool_args.get("url", "")
urls: List[str]
if isinstance(raw_url, list):
urls = [str(u).strip() for u in raw_url if str(u).strip()]
else:
urls = [str(raw_url).strip()] if str(raw_url).strip() else []
goal = str(tool_args.get("goal", "")).strip()
max_chars = int(tool_args.get("max_chars", 6000))
max_chars = max(500, min(max_chars, 20000))
urls_preview = ", ".join(f"`{u[:60]}`" for u in urls) or "_(empty)_"
status_lines.append(f"🌐 turn {turn}: visiting {urls_preview}")
yield _emit()
per_url: List[Dict[str, Any]] = []
visit_ok = 0
for u in urls:
if u in state.visited_url_set:
per_url.append({
"ok": True,
"url": u,
"cached": True,
"note": "Already visited; reusing cached result.",
})
visit_ok += 1
continue
state.visited_urls.append(u)
state.visited_url_set.add(u)
single = _run_visit_single(u, max_chars, goal)
per_url.append(single)
if single.get("ok"):
visit_ok += 1
snippet = str(single.get("content", ""))[:180]
if snippet:
state.trusted_notes.append(
f"Visited {u} and extracted key context: {snippet}"
)
tool_response = (
per_url[0]
if len(per_url) == 1
else {"ok": True, "goal": goal, "results": per_url}
)
status_lines.append(
f"✅ turn {turn}: read {visit_ok}/{len(urls)} page(s)"
)
yield _emit()
else:
tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
status_lines.append(f"⚠️ turn {turn}: unknown tool `{tool_name}`")
yield _emit()
state.trace.append({"turn": turn, "tool": tool_name, "tool_response": tool_response})
messages.append(
{
"role": "user",
"content": TOOL_RESPONSE_TEMPLATE.format(
payload=json.dumps(tool_response, ensure_ascii=False)
),
}
)
if final_answer is None:
final_answer = (
"I could not finish a complete research answer within the configured turns. "
"Try increasing max turns or switching to a stronger model."
)
else:
final_answer = ensure_markdown_table_blank_lines(final_answer)
citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
if citations:
final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
trace_text = _trace_to_json(state, used_model)
yield (final_answer, trace_text)
def run_ui(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
):
if not question.strip():
yield "Please input a question.", "{}"
return
if not os.getenv("HF_TOKEN"):
warning = (
"HF_TOKEN is not configured in Space Secrets. "
"Go to Settings -> Secrets -> add `HF_TOKEN`, then retry."
)
yield warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
return
if model == QUEST_MODEL_ID and not QUEST_BASE_URL:
warning = (
f"`{QUEST_MODEL_ID}` is private and not available via the free HF Inference API. "
"Create a dedicated HF Inference Endpoint for it (https://ui.endpoints.huggingface.co/), "
"then set `QUEST_BASE_URL` in Space Secrets to the endpoint's `/v1/` URL. "
"In the meantime you can pick one of the open-weights models in the dropdown."
)
yield warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
return
try:
for partial_answer, partial_trace in build_research_agent(
question=question,
model=model,
max_turns=max_turns,
max_search_results=max_search_results,
temperature=temperature,
):
yield partial_answer, partial_trace
except Exception as exc:
yield f"Error: {exc}", json.dumps({"error": str(exc)}, ensure_ascii=False, indent=2)
EXAMPLES = [
{
"category": "Fixed facts",
"icon": "🎯",
"text": "Who wrote the novel 1984, and when was it first published?",
},
{
"category": "Time-varying",
"icon": "📈",
"text": "Who is the current CEO of Tesla, and what is the company's latest stock price?",
},
{
"category": "Multi-constraints",
"icon": "🧩",
"text": "Find a 2-day Tokyo itinerary under $250 focused on museums and vegetarian food.",
},
{
"category": "Long-form research report",
"icon": "📚",
"text": "Write a short guide comparing electric cars vs hybrid cars for a daily commuter, covering cost, range, and maintenance.",
},
]
def _example_label(ex: Dict[str, str]) -> str:
return f"{ex['icon']} {ex['category']} — {ex['text']}"
with gr.Blocks(
title="Quest · Deep Research by OSU NLP",
theme=APP_THEME,
css=CUSTOM_CSS,
fill_width=True,
) as demo:
# --- Quest-style header (Q mark + title + byline) ---
gr.HTML(
"""
"""
)
# --- Main two-column layout ---
with gr.Row(elem_classes="layout-gap"):
with gr.Column(scale=6, min_width=420):
with gr.Group(elem_classes="section-card"):
gr.HTML(
'Ask the agent
'
'What can I research for you?
'
)
question = gr.Textbox(
show_label=False,
placeholder="Ask anything you want to research in depth...",
lines=6,
)
with gr.Row(elem_classes="action-row"):
run_btn = gr.Button("Run Research", variant="primary", size="lg")
stop_btn = gr.Button("Stop", variant="stop", size="lg")
clear_btn = gr.Button("Clear", variant="secondary", size="lg")
with gr.Group(elem_classes="section-card"):
gr.HTML(
'Try examples
'
'Each prompt shows the kind of query it represents. Click one to auto-fill.
'
)
with gr.Column(elem_classes="example-buttons"):
example_buttons = [
gr.Button(_example_label(ex), variant="secondary", elem_classes="example-btn")
for ex in EXAMPLES
]
with gr.Group(elem_classes="section-card"):
gr.HTML('Output
')
with gr.Tabs():
with gr.TabItem("Result"):
answer = gr.Markdown(label="Final Answer")
with gr.TabItem("Record"):
trace = gr.Code(label="Execution Trace (JSON)", language="json")
with gr.Column(scale=4, min_width=340, elem_classes="right-stack"):
with gr.Group(elem_classes="section-card"):
gr.HTML(
f"""
Open release
"""
)
with gr.Group(elem_classes="section-card"):
gr.HTML('Settings
')
model = gr.Dropdown(
label="Model",
choices=DEFAULT_MODEL_CHOICES,
value=DEFAULT_MODEL if DEFAULT_MODEL in DEFAULT_MODEL_CHOICES else DEFAULT_MODEL_CHOICES[0],
allow_custom_value=True,
)
max_turns = gr.Slider(
label="Max Turns",
minimum=2,
maximum=20,
value=6,
step=1,
)
max_search_results = gr.Slider(
label="Search Results Per Query",
minimum=1,
maximum=10,
value=5,
step=1,
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.5,
value=0.4,
step=0.1,
)
gr.HTML(
"""
"""
)
run_event = run_btn.click(
fn=run_ui,
inputs=[question, model, max_turns, max_search_results, temperature],
outputs=[answer, trace],
)
for btn, ex in zip(example_buttons, EXAMPLES):
btn.click(
fn=(lambda text=ex["text"]: text),
inputs=[],
outputs=[question],
)
stop_btn.click(fn=None, cancels=[run_event])
clear_btn.click(
fn=lambda: ("", "", "{}"),
inputs=[],
outputs=[question, answer, trace],
)
if __name__ == "__main__":
demo.launch()