Spaces:

osunlp
/

QUEST

Running

TomLii commited on Apr 20

Commit

1a201e4

1 Parent(s): 16c6fb1

Fix empty "..." Markdown output by reworking the <answer> fallback prompt

The "no tool call detected" fallback literally contained
`<answer>...</answer>`, so Quest-4B echoed the template verbatim and
extract_answer captured `...` as the final answer. Reword the prompt,
reject placeholder-only content (ASCII and unicode ellipses), strip
<think> blocks before parsing, handle truncated <answer>, guarantee a
blank line before pipe tables so GFM renders them, and raise the
generation budget from 1400 to 4096 tokens (env-configurable via
QUEST_MAX_NEW_TOKENS).

Made-with: Cursor

Files changed (1) hide show

app.py +108 -5

app.py CHANGED Viewed

@@ -805,13 +805,99 @@ class AgentState:
     trace: List[Dict[str, Any]] = field(default_factory=list)
 def extract_answer(text: str) -> Optional[str]:
-    match = re.search(r"<answer>\s*(.*?)\s*</answer>", text, flags=re.DOTALL | re.IGNORECASE)
-    return match.group(1).strip() if match else None
 def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
-    match = re.search(r"<tool_call>\s*(.*?)\s*</tool_call>", text, flags=re.DOTALL | re.IGNORECASE)
     if not match:
         return None, None, None
     payload = match.group(1).strip()
@@ -1000,7 +1086,7 @@ def build_research_agent(
             preferred_model=primary_model,
             candidate_models=fallback_models,
             temperature=temperature,
-            max_new_tokens=1400,
         )
         model_output = raw_output
         # Preserve the human-friendly model id for the trace even if the
@@ -1019,10 +1105,25 @@ def build_research_agent(
             tool_response = {"ok": False, "error": tool_err}
         elif not tool_name:
             # No explicit tool call and no final answer: force finalization.
             messages.append(
                 {
                     "role": "user",
-                    "content": "No tool call detected. Provide your best final answer in <answer>...</answer> now.",
                 }
             )
             continue
@@ -1117,6 +1218,8 @@ def build_research_agent(
             "I could not finish a complete research answer within the configured turns. "
             "Try increasing max turns or switching to a stronger model."
         )
     citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
     final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"

     trace: List[Dict[str, Any]] = field(default_factory=list)
+# Accept a variety of placeholder-only answers: a bare ellipsis (ASCII `...`
+# or unicode `…`), a single interpunct, and any whitespace-only content. These
+# show up when the model echoes a literal `<answer>...</answer>` template
+# from the prompt instead of producing a real answer.
+_PLACEHOLDER_ANSWER_RE = re.compile(r"^[\s.\u2026\u00b7]*$")
+# Pipe-table separator line, e.g. `| --- | :---: |`. The outer pipes are
+# optional in some GFM dialects, so we accept both.
+_TABLE_SEPARATOR_RE = re.compile(
+    r"^\s*\|?\s*:?-{2,}:?(?:\s*\|\s*:?-{2,}:?)+\s*\|?\s*$"
+)
+def strip_think_blocks(text: str) -> str:
+    """Remove any <think>...</think> reasoning blocks.
+    Quest-4B (Qwen3 family) emits `<think>` reasoning before the final
+    answer. When the endpoint is deployed without a reasoning parser, the raw
+    tags leak into chat completion `content`; stripping them here keeps the
+    extracted answer clean for Markdown rendering.
+    """
+    return re.sub(
+        r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE
+    )
+def _is_placeholder_answer(text: str) -> bool:
+    return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
+def ensure_markdown_table_blank_lines(text: str) -> str:
+    """Insert a blank line before any pipe-table header row.
+    GitHub-Flavored Markdown requires a pipe table to be preceded by a
+    paragraph break; otherwise the header row is folded into the previous
+    paragraph and the whole table renders as raw text. Models sometimes glue
+    the table directly under a sentence (e.g. "Here's the comparison: | Col
+    ..."), so we fix that up defensively.
+    """
+    lines = text.split("\n")
+    out: List[str] = []
+    for idx, line in enumerate(lines):
+        is_header = (
+            "|" in line
+            and idx + 1 < len(lines)
+            and _TABLE_SEPARATOR_RE.match(lines[idx + 1]) is not None
+        )
+        if is_header and out and out[-1].strip() != "":
+            out.append("")
+        out.append(line)
+    return "\n".join(out)
 def extract_answer(text: str) -> Optional[str]:
+    """Return the content of the first `<answer>...</answer>` block.
+    Tries two strategies, in order, and discards placeholder-only content
+    (bare ellipses) that the model sometimes echoes from the prompt:
+    1. Well-formed `<answer>...</answer>` block.
+    2. Truncated `<answer>...` with no closing tag (tokens ran out);
+       in that case we take everything after the opening tag.
+    """
+    cleaned = strip_think_blocks(text or "")
+    full_match = re.search(
+        r"<answer>\s*(.*?)\s*</answer>",
+        cleaned,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    if full_match is not None:
+        candidate = full_match.group(1).strip()
+        if candidate and not _is_placeholder_answer(candidate):
+            return candidate
+        # Closed block was a placeholder / empty: fail fast. Do NOT fall
+        # through to the open-ended strategy, or it would re-match the same
+        # tag and incorrectly capture `...</answer>` as the answer.
+        return None
+    open_match = re.search(
+        r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
+    )
+    if open_match is not None:
+        candidate = open_match.group(1).strip()
+        if candidate and not _is_placeholder_answer(candidate):
+            return candidate
+    return None
 def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
+    cleaned = strip_think_blocks(text or "")
+    match = re.search(r"<tool_call>\s*(.*?)\s*</tool_call>", cleaned, flags=re.DOTALL | re.IGNORECASE)
     if not match:
         return None, None, None
     payload = match.group(1).strip()
             preferred_model=primary_model,
             candidate_models=fallback_models,
             temperature=temperature,
+            max_new_tokens=int(os.getenv("QUEST_MAX_NEW_TOKENS", "4096")),
         )
         model_output = raw_output
         # Preserve the human-friendly model id for the trace even if the
             tool_response = {"ok": False, "error": tool_err}
         elif not tool_name:
             # No explicit tool call and no final answer: force finalization.
+            # IMPORTANT: do not write the literal characters `<answer>...</answer>`
+            # here. Some models (notably the Qwen3 family that Quest-4B is
+            # built on) will echo the template verbatim, which means the
+            # extracted answer ends up being the three-dot placeholder `...`
+            # and the user sees an empty-looking result.
             messages.append(
                 {
                     "role": "user",
+                    "content": (
+                        "You did not call a tool and did not produce a final "
+                        "answer. Please now write your best final answer, "
+                        "wrapped between an opening <answer> tag and a "
+                        "closing </answer> tag. Put the real answer text "
+                        "between those tags; do not write a literal ellipsis "
+                        "or other placeholder. If the question asks for "
+                        "tabular data, use GitHub-Flavored Markdown pipe "
+                        "tables (`| col1 | col2 |` + `|---|---|`) and put a "
+                        "blank line before the first row so the table renders."
+                    ),
                 }
             )
             continue
             "I could not finish a complete research answer within the configured turns. "
             "Try increasing max turns or switching to a stronger model."
         )
+    else:
+        final_answer = ensure_markdown_table_blank_lines(final_answer)
     citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
     final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"