block."
name = data.get("name")
arguments = data.get("arguments", {})
if not isinstance(name, str) or not isinstance(arguments, dict):
return None, None, "Invalid tool format. Expect name(str) and arguments(dict)."
return name, arguments, None
def run_search(query: str, max_results: int = 5) -> Dict[str, Any]:
if not query.strip():
return {"ok": False, "error": "Search query cannot be empty."}
cache_key = f"{query.strip().lower()}::{max_results}"
if cache_key in SEARCH_CACHE:
return {**SEARCH_CACHE[cache_key], "cached": True}
rows: List[Dict[str, str]] = []
with DDGS() as ddgs:
for item in ddgs.text(query, max_results=max_results):
rows.append(
{
"title": item.get("title", ""),
"href": item.get("href", ""),
"body": item.get("body", ""),
}
)
payload = {"ok": True, "query": query, "results": rows, "cached": False}
SEARCH_CACHE[cache_key] = payload
return payload
def _clean_html_to_text(html: str, max_chars: int) -> str:
soup = BeautifulSoup(html, "html.parser")
for tag in soup(["script", "style", "noscript"]):
tag.decompose()
text = soup.get_text(separator=" ", strip=True)
text = re.sub(r"\s+", " ", text)
return text[:max_chars]
def run_visit(url: str, max_chars: int = 6000) -> Dict[str, Any]:
if not url.strip():
return {"ok": False, "error": "URL cannot be empty."}
cache_key = f"{url.strip()}::{max_chars}"
if cache_key in VISIT_CACHE:
return {**VISIT_CACHE[cache_key], "cached": True}
try:
resp = requests.get(
url,
timeout=20,
headers={"User-Agent": "Mozilla/5.0 (compatible; DeepResearchSpace/1.0)"},
)
resp.raise_for_status()
content_type = resp.headers.get("content-type", "")
if "text/html" in content_type or " Tuple[str, str]:
model_order: List[str] = []
for m in [preferred_model] + candidate_models:
if m and m not in model_order:
model_order.append(m)
last_error = None
for model_name in model_order:
try:
completion = client.chat_completion(
model=model_name,
messages=messages,
temperature=temperature,
max_tokens=max_new_tokens,
)
return completion.choices[0].message.content or "", model_name
except Exception as exc:
last_error = exc
continue
raise RuntimeError(f"All model candidates failed. Last error: {last_error}")
def build_research_agent(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
) -> Tuple[str, str]:
token = os.getenv("HF_TOKEN")
client = InferenceClient(token=token)
state = AgentState()
used_model = model
recent_model_candidates = [m for m in DEFAULT_FREE_MODELS if m != model]
messages: List[Dict[str, str]] = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": question},
]
final_answer: Optional[str] = None
for turn in range(1, max_turns + 1):
if state.trusted_notes and turn > 1 and turn % 3 == 0:
summary_lines = "\n".join(f"- {n}" for n in state.trusted_notes[-6:])
messages.append(
{
"role": "user",
"content": f"RESEARCH STATE SUMMARY\n{summary_lines}\nUse this summary to avoid repeating work.",
}
)
model_output, used_model = call_model(
client=client,
messages=messages,
preferred_model=model,
candidate_models=recent_model_candidates,
temperature=temperature,
max_new_tokens=1400,
)
messages.append({"role": "assistant", "content": model_output})
state.trace.append({"turn": turn, "assistant": model_output})
extracted_answer = extract_answer(model_output)
if extracted_answer:
final_answer = extracted_answer
break
tool_name, tool_args, tool_err = parse_tool_call(model_output)
if tool_err:
tool_response = {"ok": False, "error": tool_err}
elif not tool_name:
# No explicit tool call and no final answer: force finalization.
messages.append(
{
"role": "user",
"content": "No tool call detected. Provide your best final answer in ... now.",
}
)
continue
else:
if tool_name == "search":
query = str(tool_args.get("query", "")).strip()
max_results = int(tool_args.get("max_results", max_search_results))
max_results = max(1, min(max_results, 10))
if query in state.searched_query_set:
tool_response = {
"ok": True,
"query": query,
"cached": True,
"note": "This query was already searched. Reusing cached result to avoid duplicate work.",
"results": [],
}
else:
state.searched_queries.append(query)
state.searched_query_set.add(query)
tool_response = run_search(query=query, max_results=max_results)
if tool_response.get("ok"):
first_titles = [r.get("title", "") for r in tool_response.get("results", [])[:2]]
if first_titles:
state.trusted_notes.append(
f"Searched '{query}' and found leads: {', '.join(t for t in first_titles if t)}"
)
elif tool_name == "visit":
url = str(tool_args.get("url", "")).strip()
max_chars = int(tool_args.get("max_chars", 6000))
max_chars = max(500, min(max_chars, 20000))
if url in state.visited_url_set:
tool_response = {
"ok": True,
"url": url,
"cached": True,
"note": "This URL was already visited. Reusing cached result to avoid duplicate work.",
}
else:
state.visited_urls.append(url)
state.visited_url_set.add(url)
tool_response = run_visit(url=url, max_chars=max_chars)
if tool_response.get("ok"):
snippet = str(tool_response.get("content", ""))[:180]
if snippet:
state.trusted_notes.append(
f"Visited {url} and extracted key context: {snippet}"
)
else:
tool_response = {"ok": False, "error": f"Unknown tool: {tool_name}"}
state.trace.append({"turn": turn, "tool": tool_name, "tool_response": tool_response})
messages.append(
{
"role": "user",
"content": TOOL_RESPONSE_TEMPLATE.format(
payload=json.dumps(tool_response, ensure_ascii=False)
),
}
)
if final_answer is None:
final_answer = (
"I could not finish a complete research answer within the configured turns. "
"Try increasing max turns or switching to a stronger model."
)
citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
if citations:
final_answer = f"{final_answer}\n\n### Visited Sources\n{citations}"
trace_text = json.dumps(
{
"used_model": used_model,
"searched_queries": state.searched_queries,
"visited_urls": state.visited_urls,
"trusted_notes": state.trusted_notes[-10:],
"trace": state.trace,
},
ensure_ascii=False,
indent=2,
)
return final_answer, trace_text
def run_ui(
question: str,
model: str,
max_turns: int,
max_search_results: int,
temperature: float,
):
if not question.strip():
return "Please input a question.", "{}"
if not os.getenv("HF_TOKEN"):
warning = (
"HF_TOKEN is not configured in Space Secrets. "
"Go to Settings -> Secrets -> add `HF_TOKEN`, then retry."
)
return warning, json.dumps({"error": warning}, ensure_ascii=False, indent=2)
try:
return build_research_agent(
question=question,
model=model,
max_turns=max_turns,
max_search_results=max_search_results,
temperature=temperature,
)
except Exception as exc:
return f"Error: {exc}", json.dumps({"error": str(exc)}, ensure_ascii=False, indent=2)
with gr.Blocks(
title="DeepResearch Space Starter",
theme=gr.themes.Default(
text_size="md",
radius_size="md",
spacing_size="md",
),
css=CUSTOM_CSS,
) as demo:
with gr.Row(elem_classes="layout-gap"):
with gr.Column(scale=7):
with gr.Group(elem_classes="section-card"):
gr.HTML('Chat
')
question = gr.Textbox(
show_label=False,
placeholder="Ask anything you want to research...",
lines=6,
)
with gr.Row():
run_btn = gr.Button("Run Research", variant="primary", size="lg")
stop_btn = gr.Button("Stop", variant="stop", size="lg")
clear_btn = gr.Button("Clear", variant="secondary", size="lg")
with gr.Group(elem_classes="section-card"):
gr.HTML('Try Examples
')
gr.HTML('Click to auto-fill the chat box
')
with gr.Column(elem_classes="example-buttons"):
ex1_btn = gr.Button("🌏 Plan a 2-day Tokyo trip under $250 with trade-offs", variant="secondary")
ex2_btn = gr.Button("🤖 Compare 3 open-source coding agents with pros/cons", variant="secondary")
ex3_btn = gr.Button("⚖️ RAG vs fine-tuning for legal QA: which and why?", variant="secondary")
with gr.Group(elem_classes="section-card"):
with gr.Tabs():
with gr.TabItem("Result"):
answer = gr.Markdown(label="Final Answer")
with gr.TabItem("Record"):
trace = gr.Code(label="Execution Trace (JSON)", language="json")
with gr.Column(scale=3, elem_classes="right-stack"):
with gr.Group(elem_classes=["section-card", "no-frame"]):
gr.Image(
value=LOGO_PATH,
show_label=False,
container=False,
interactive=False,
show_download_button=False,
show_fullscreen_button=False,
elem_classes="banner-logo-image",
)
with gr.Group(elem_classes=["section-card", "no-frame"]):
gr.HTML(
f"""
"""
)
with gr.Group(elem_classes="section-card"):
gr.HTML('Settings
')
model = gr.Dropdown(
label="Model",
choices=DEFAULT_FREE_MODELS,
value=DEFAULT_MODEL if DEFAULT_MODEL in DEFAULT_FREE_MODELS else DEFAULT_FREE_MODELS[0],
allow_custom_value=True,
info="You can type any model id supported by HF Inference API.",
)
max_turns = gr.Slider(label="Max Turns", minimum=2, maximum=20, value=8, step=1, elem_classes="slider-root")
max_search_results = gr.Slider(
label="Search Results Per Query", minimum=1, maximum=10, value=5, step=1
, elem_classes="slider-root"
)
temperature = gr.Slider(
label="Temperature", minimum=0.0, maximum=1.5, value=0.4, step=0.1, elem_classes="slider-root"
)
run_event = run_btn.click(
fn=run_ui,
inputs=[question, model, max_turns, max_search_results, temperature],
outputs=[answer, trace],
)
ex1_btn.click(
fn=lambda: "Plan a 2-day food + museum itinerary in Tokyo under $250, with trade-offs.",
inputs=[],
outputs=[question],
)
ex2_btn.click(
fn=lambda: "Find 3 open-source coding agents and compare real strengths/limitations.",
inputs=[],
outputs=[question],
)
ex3_btn.click(
fn=lambda: "Explain whether RAG or fine-tuning is better for a legal QA assistant, and why.",
inputs=[],
outputs=[question],
)
stop_btn.click(fn=None, cancels=[run_event])
clear_btn.click(
fn=lambda: ("", "", "{}"),
inputs=[],
outputs=[question, answer, trace],
)
if __name__ == "__main__":
demo.launch()