block."
name = data.get("name")
arguments = data.get("arguments", {})
if not isinstance(name, str) or not isinstance(arguments, dict):
return None, None, "Invalid tool format. Expect name(str) and arguments(dict)."
return name, arguments, None
def _run_search_single(query: str, max_results: int) -> Dict[str, Any]:
if not query.strip():
return {"ok": False, "error": "Search query cannot be empty."}
cache_key = f"{query.strip().lower()}::{max_results}"
if cache_key in SEARCH_CACHE:
return {**SEARCH_CACHE[cache_key], "cached": True}
rows: List[Dict[str, str]] = []
with DDGS() as ddgs:
for item in ddgs.text(query, max_results=max_results):
rows.append(
{
"title": item.get("title", ""),
"href": item.get("href", ""),
"body": item.get("body", ""),
}
)
payload = {"ok": True, "query": query, "results": rows, "cached": False}
SEARCH_CACHE[cache_key] = payload
return payload
def run_search(query: Union[str, List[str]], max_results: int = 5) -> Dict[str, Any]:
"""Runs one or more queries through DuckDuckGo.
QUEST's schema passes `query` as an array of strings, while the simpler
starter schema used a single string. We accept both shapes.
"""
if isinstance(query, list):
sub_results: List[Dict[str, Any]] = []
for q in query:
if not isinstance(q, str) or not q.strip():
continue
sub_results.append(_run_search_single(q, max_results))
return {"ok": True, "queries": query, "results": sub_results}
return _run_search_single(str(query or "").strip(), max_results)
def _clean_html_to_text(html: str, max_chars: int) -> str:
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
text = soup.get_text(separator=" ", strip=True)
text = re.sub(r"\s+", " ", text)
return text[:max_chars]
def _run_visit_single(url: str, max_chars: int, goal: str = "") -> Dict[str, Any]:
if not url.strip():
return {"ok": False, "error": "URL cannot be empty."}
cache_key = f"{url.strip()}::{max_chars}"
if cache_key in VISIT_CACHE:
return {**VISIT_CACHE[cache_key], "cached": True, "goal": goal}
try:
resp = requests.get(
url,
timeout=20,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepResearchSpace/1.0)"},
)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "text/html" in content_type or " Dict[str, Any]:
"""Fetches one or more URLs. Accepts string or list (QUEST schema)."""
if isinstance(url, list):
sub_results: List[Dict[str, Any]] = []
for u in url:
if not isinstance(u, str) or not u.strip():
continue
sub_results.append(_run_visit_single(u, max_chars, goal))
return {"ok": True, "goal": goal, "results": sub_results}
return _run_visit_single(str(url or "").strip(), max_chars, goal)
def _build_client_for_model(model: str) -> Tuple[InferenceClient, str, List[str]]:
"""Returns (client, primary_model_id, fallback_model_ids).
When the user picks the Quest model and QUEST_BASE_URL is configured, the
InferenceClient is pointed at the dedicated endpoint; otherwise we hit the
shared HF Inference API and let the starter fall back across free models.
"""
token = os.getenv("HF_TOKEN")
if model == QUEST_MODEL_ID and QUEST_BASE_URL:
client = InferenceClient(
base_url=QUEST_BASE_URL,
token=token,
timeout=120,
)
return client, QUEST_ENDPOINT_MODEL, []
client = InferenceClient(token=token, timeout=60)
fallbacks = [m for m in FREE_FALLBACK_MODELS if m != model]
return client, model, fallbacks
def call_model(
client: InferenceClient,
messages: List[Dict[str, str]],
preferred_model: str,
candidate_models: List[str],
temperature: float,
max_new_tokens: int,
) -> Tuple[str, str]:
model_order: List[str] = []
for m in [preferred_model] + candidate_models:
if m and m not in model_order:
model_order.append(m)
last_error = None
for model_name in model_order:
try:
completion = client.chat_completion(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
)
return completion.choices[0].message.content or "", model_name
except Exception as exc:
last_error = exc
continue
raise RuntimeError(f"All model candidates failed. Last error: {last_error}")
def build_research_agent(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
) -> Tuple[str, str]:
client, primary_model, fallback_models = _build_client_for_model(model)
# Display label: the real HF repo id is nicer than the TGI shim name.
display_primary = model if (model == QUEST_MODEL_ID) else primary_model
state = AgentState()
used_model = display_primary
messages: List[Dict[str, str]] = [
{"role": "system", "content": build_system_prompt()},
{"role": "user", "content": question},
]
final_answer: Optional[str] = None
for turn in range(1, max_turns + 1):
if state.trusted_notes and turn > 1 and turn % 3 == 0:
summary_lines = "\n".join(f"- {n}" for n in state.trusted_notes[-6:])
messages.append(
{
"role": "user",
"content": f"RESEARCH STATE SUMMARY\n{summary_lines}\nUse this summary to avoid repeating work.",
}
)
raw_output, endpoint_model = call_model(
client=client,
messages=messages,
preferred_model=primary_model,
candidate_models=fallback_models,
temperature=temperature,
max_new_tokens=int(os.getenv("QUEST_MAX_NEW_TOKENS", "4096")),
)
model_output = raw_output
# Preserve the human-friendly model id for the trace even if the
# endpoint ignores the "model" param and returns the TGI shim name.
used_model = display_primary if endpoint_model == primary_model == QUEST_ENDPOINT_MODEL else endpoint_model
messages.append({"role": "assistant", "content": model_output})
state.trace.append({"turn": turn, "assistant": model_output})
extracted_answer = extract_answer(model_output)
if extracted_answer:
final_answer = extracted_answer
break
tool_name, tool_args, tool_err = parse_tool_call(model_output)
if tool_err:
tool_response = {"ok": False, "error": tool_err}
elif not tool_name:
# No explicit tool call and no final answer: force finalization.
# IMPORTANT: do not write the literal characters `...`
# here. Some models (notably the Qwen3 family that Quest-4B is
# built on) will echo the template verbatim, which means the
# extracted answer ends up being the three-dot placeholder `...`
# and the user sees an empty-looking result.
messages.append(
{
"role": "user",
"content": (
"You did not call a tool and did not produce a final "
"answer. Please now write your best final answer, "
"wrapped between an opening tag and a "
"closing tag. Put the real answer text "
"between those tags; do not write a literal ellipsis "
"or other placeholder. If the question asks for "
"tabular data, use GitHub-Flavored Markdown pipe "
"tables (`| col1 | col2 |` + `|---|---|`) and put a "
"blank line before the first row so the table renders."
),
}
)
continue
else:
if tool_name == "search":
raw_query = tool_args.get("query", "")
queries: List[str]
if isinstance(raw_query, list):
queries = [str(q).strip() for q in raw_query if str(q).strip()]
else:
queries = [str(raw_query).strip()] if str(raw_query).strip() else []
max_results = int(tool_args.get("max_results", max_search_results))
max_results = max(1, min(max_results, 10))
per_query: List[Dict[str, Any]] = []
for q in queries:
if q in state.searched_query_set:
per_query.append({
"ok": True,
"query": q,
"cached": True,
"note": "Already searched; reusing cached result.",
"results": [],
})
continue
state.searched_queries.append(q)
state.searched_query_set.add(q)
single = _run_search_single(q, max_results)
per_query.append(single)
if single.get("ok"):
first_titles = [r.get("title", "") for r in single.get("results", [])[:2]]
if first_titles:
state.trusted_notes.append(
f"Searched '{q}' and found leads: {', '.join(t for t in first_titles if t)}"
)
tool_response = (
per_query[0]
if len(per_query) == 1
else {"ok": True, "queries": queries, "results": per_query}
)
elif tool_name == "visit":
raw_url = tool_args.get("url", "")
urls: List[str]
if isinstance(raw_url, list):
urls = [str(u).strip() for u in raw_url if str(u).strip()]
else:
urls = [str(raw_url).strip()] if str(raw_url).strip() else []
goal = str(tool_args.get("goal", "")).strip()
max_chars = int(tool_args.get("max_chars", 6000))
max_chars = max(500, min(max_chars, 20000))
per_url: List[Dict[str, Any]] = []
for u in urls:
if u in state.visited_url_set:
per_url.append({
"ok": True,
"url": u,
"cached": True,
"note": "Already visited; reusing cached result.",
})
continue
state.visited_urls.append(u)
state.visited_url_set.add(u)
single = _run_visit_single(u, max_chars, goal)
per_url.append(single)
if single.get("ok"):
snippet = str(single.get("content", ""))[:180]
if snippet:
state.trusted_notes.append(
f"Visited {u} and extracted key context: {snippet}"
)
tool_response = (
per_url[0]
if len(per_url) == 1
else {"ok": True, "goal": goal, "results": per_url}
)
else:
tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
state.trace.append({"turn": turn, "tool": tool_name, "tool_response": tool_response})
messages.append(
{
"role": "user",
"content": TOOL_RESPONSE_TEMPLATE.format(
payload=json.dumps(tool_response, ensure_ascii=False)
),
}
)
if final_answer is None:
final_answer = (
"I could not finish a complete research answer within the configured turns. "
"Try increasing max turns or switching to a stronger model."
)
else:
final_answer = ensure_markdown_table_blank_lines(final_answer)
citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
if citations:
final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
trace_text = json.dumps(
{
"used_model": used_model,
"searched_queries": state.searched_queries,
"visited_urls": state.visited_urls,
"trusted_notes": state.trusted_notes[-10:],
"trace": state.trace,
},
ensure_ascii=False,
indent=2,
)
return final_answer, trace_text
def run_ui(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
):
if not question.strip():
return "Please input a question.", "{}"
if not os.getenv("HF_TOKEN"):
warning = (
"HF_TOKEN is not configured in Space Secrets. "
"Go to Settings -> Secrets -> add `HF_TOKEN`, then retry."
)
return warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
if model == QUEST_MODEL_ID and not QUEST_BASE_URL:
warning = (
f"`{QUEST_MODEL_ID}` is private and not available via the free HF Inference API. "
"Create a dedicated HF Inference Endpoint for it (https://ui.endpoints.huggingface.co/), "
"then set `QUEST_BASE_URL` in Space Secrets to the endpoint's `/v1/` URL. "
"In the meantime you can pick one of the open-weights models in the dropdown."
)
return warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
try:
return build_research_agent(
question=question,
model=model,
max_turns=max_turns,
max_search_results=max_search_results,
temperature=temperature,
)
except Exception as exc:
return f"Error: {exc}", json.dumps({"error": str(exc)}, ensure_ascii=False, indent=2)
EXAMPLES = [
{
"category": "Fixed facts",
"icon": "๐ฏ",
"text": "Who wrote the novel 1984, and when was it first published?",
},
{
"category": "Time-varying",
"icon": "๐",
"text": "Who is the current CEO of Tesla, and what is the company's latest stock price?",
},
{
"category": "Multi-constraints",
"icon": "๐งฉ",
"text": "Find a 2-day Tokyo itinerary under $250 focused on museums and vegetarian food.",
},
{
"category": "Long-form research report",
"icon": "๐",
"text": "Write a short guide comparing electric cars vs hybrid cars for a daily commuter, covering cost, range, and maintenance.",
},
]
def _example_label(ex: Dict[str, str]) -> str:
return f"{ex['icon']} {ex['category']} โ {ex['text']}"
with gr.Blocks(
title="Quest ยท Deep Research by OSU NLP",
theme=APP_THEME,
css=CUSTOM_CSS,
fill_width=True,
) as demo:
# --- Top banner with the Quest logo centered and the OSU NLP mark on the right ---
with gr.Row(elem_classes="top-banner"):
with gr.Column(scale=1, elem_classes="banner-side"):
pass
with gr.Column(scale=4, elem_classes="banner-center"):
gr.Image(
value=LOGO_PATH,
show_label=False,
container=False,
interactive=False,
show_download_button=False,
show_fullscreen_button=False,
show_share_button=False,
elem_classes="banner-quest-logo",
)
with gr.Column(scale=1, elem_classes="banner-side banner-right"):
gr.Image(
value=OSU_NLP_LOGO_PATH,
show_label=False,
container=False,
interactive=False,
show_download_button=False,
show_fullscreen_button=False,
show_share_button=False,
elem_classes="osu-nlp-logo",
)
# --- Main two-column layout ---
with gr.Row(elem_classes="layout-gap"):
with gr.Column(scale=6, min_width=420):
with gr.Group(elem_classes="section-card"):
gr.HTML('What can I search for you?
')
question = gr.Textbox(
show_label=False,
placeholder="Ask anything you want to research in depth...",
lines=6,
)
with gr.Row(elem_classes="action-row"):
run_btn = gr.Button("Run Research", variant="primary", size="lg")
stop_btn = gr.Button("Stop", variant="stop", size="lg")
clear_btn = gr.Button("Clear", variant="secondary", size="lg")
with gr.Group(elem_classes="section-card"):
gr.HTML('Try Examples
')
gr.HTML(
'Each example shows the kind of query it represents. Click one to auto-fill.
'
)
with gr.Column(elem_classes="example-buttons"):
example_buttons = [
gr.Button(_example_label(ex), variant="secondary", elem_classes="example-btn")
for ex in EXAMPLES
]
with gr.Group(elem_classes="section-card"):
with gr.Tabs():
with gr.TabItem("Result"):
answer = gr.Markdown(label="Final Answer")
with gr.TabItem("Record"):
trace = gr.Code(label="Execution Trace (JSON)", language="json")
with gr.Column(scale=4, min_width=340, elem_classes="right-stack"):
with gr.Group(elem_classes=["section-card", "no-frame"]):
gr.HTML(
f"""
"""
)
with gr.Group(elem_classes="section-card"):
gr.HTML('Settings
')
model = gr.Dropdown(
label="Model",
choices=DEFAULT_MODEL_CHOICES,
value=DEFAULT_MODEL if DEFAULT_MODEL in DEFAULT_MODEL_CHOICES else DEFAULT_MODEL_CHOICES[0],
allow_custom_value=True,
)
max_turns = gr.Slider(
label="Max Turns",
minimum=2,
maximum=20,
value=8,
step=1,
)
max_search_results = gr.Slider(
label="Search Results Per Query",
minimum=1,
maximum=10,
value=5,
step=1,
)
temperature = gr.Slider(
label="Temperature",
minimum=0.0,
maximum=1.5,
value=0.4,
step=0.1,
)
run_event = run_btn.click(
fn=run_ui,
inputs=[question, model, max_turns, max_search_results, temperature],
outputs=[answer, trace],
)
for btn, ex in zip(example_buttons, EXAMPLES):
btn.click(
fn=(lambda text=ex["text"]: text),
inputs=[],
outputs=[question],
)
stop_btn.click(fn=None, cancels=[run_event])
clear_btn.click(
fn=lambda: ("", "", "{}"),
inputs=[],
outputs=[question, answer, trace],
)
if __name__ == "__main__":
demo.launch()