TomLii commited on
Commit
1a201e4
·
1 Parent(s): 16c6fb1

Fix empty "..." Markdown output by reworking the <answer> fallback prompt

Browse files

The "no tool call detected" fallback literally contained
`<answer>...</answer>`, so Quest-4B echoed the template verbatim and
extract_answer captured `...` as the final answer. Reword the prompt,
reject placeholder-only content (ASCII and unicode ellipses), strip
<think> blocks before parsing, handle truncated <answer>, guarantee a
blank line before pipe tables so GFM renders them, and raise the
generation budget from 1400 to 4096 tokens (env-configurable via
QUEST_MAX_NEW_TOKENS).

Made-with: Cursor

Files changed (1) hide show
  1. app.py +108 -5
app.py CHANGED
@@ -805,13 +805,99 @@ class AgentState:
805
  trace: List[Dict[str, Any]] = field(default_factory=list)
806
 
807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
808
  def extract_answer(text: str) -> Optional[str]:
809
- match = re.search(r"<answer>\s*(.*?)\s*</answer>", text, flags=re.DOTALL | re.IGNORECASE)
810
- return match.group(1).strip() if match else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
811
 
812
 
813
  def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
814
- match = re.search(r"<tool_call>\s*(.*?)\s*</tool_call>", text, flags=re.DOTALL | re.IGNORECASE)
 
815
  if not match:
816
  return None, None, None
817
  payload = match.group(1).strip()
@@ -1000,7 +1086,7 @@ def build_research_agent(
1000
  preferred_model=primary_model,
1001
  candidate_models=fallback_models,
1002
  temperature=temperature,
1003
- max_new_tokens=1400,
1004
  )
1005
  model_output = raw_output
1006
  # Preserve the human-friendly model id for the trace even if the
@@ -1019,10 +1105,25 @@ def build_research_agent(
1019
  tool_response = {"ok": False, "error": tool_err}
1020
  elif not tool_name:
1021
  # No explicit tool call and no final answer: force finalization.
 
 
 
 
 
1022
  messages.append(
1023
  {
1024
  "role": "user",
1025
- "content": "No tool call detected. Provide your best final answer in <answer>...</answer> now.",
 
 
 
 
 
 
 
 
 
 
1026
  }
1027
  )
1028
  continue
@@ -1117,6 +1218,8 @@ def build_research_agent(
1117
  "I could not finish a complete research answer within the configured turns. "
1118
  "Try increasing max turns or switching to a stronger model."
1119
  )
 
 
1120
 
1121
  citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
1122
  final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"
 
805
  trace: List[Dict[str, Any]] = field(default_factory=list)
806
 
807
 
808
+ # Accept a variety of placeholder-only answers: a bare ellipsis (ASCII `...`
809
+ # or unicode `…`), a single interpunct, and any whitespace-only content. These
810
+ # show up when the model echoes a literal `<answer>...</answer>` template
811
+ # from the prompt instead of producing a real answer.
812
+ _PLACEHOLDER_ANSWER_RE = re.compile(r"^[\s.\u2026\u00b7]*$")
813
+
814
+ # Pipe-table separator line, e.g. `| --- | :---: |`. The outer pipes are
815
+ # optional in some GFM dialects, so we accept both.
816
+ _TABLE_SEPARATOR_RE = re.compile(
817
+ r"^\s*\|?\s*:?-{2,}:?(?:\s*\|\s*:?-{2,}:?)+\s*\|?\s*$"
818
+ )
819
+
820
+
821
+ def strip_think_blocks(text: str) -> str:
822
+ """Remove any <think>...</think> reasoning blocks.
823
+
824
+ Quest-4B (Qwen3 family) emits `<think>` reasoning before the final
825
+ answer. When the endpoint is deployed without a reasoning parser, the raw
826
+ tags leak into chat completion `content`; stripping them here keeps the
827
+ extracted answer clean for Markdown rendering.
828
+ """
829
+ return re.sub(
830
+ r"<think>.*?</think>", "", text, flags=re.DOTALL | re.IGNORECASE
831
+ )
832
+
833
+
834
+ def _is_placeholder_answer(text: str) -> bool:
835
+ return bool(_PLACEHOLDER_ANSWER_RE.match(text or ""))
836
+
837
+
838
+ def ensure_markdown_table_blank_lines(text: str) -> str:
839
+ """Insert a blank line before any pipe-table header row.
840
+
841
+ GitHub-Flavored Markdown requires a pipe table to be preceded by a
842
+ paragraph break; otherwise the header row is folded into the previous
843
+ paragraph and the whole table renders as raw text. Models sometimes glue
844
+ the table directly under a sentence (e.g. "Here's the comparison: | Col
845
+ ..."), so we fix that up defensively.
846
+ """
847
+ lines = text.split("\n")
848
+ out: List[str] = []
849
+ for idx, line in enumerate(lines):
850
+ is_header = (
851
+ "|" in line
852
+ and idx + 1 < len(lines)
853
+ and _TABLE_SEPARATOR_RE.match(lines[idx + 1]) is not None
854
+ )
855
+ if is_header and out and out[-1].strip() != "":
856
+ out.append("")
857
+ out.append(line)
858
+ return "\n".join(out)
859
+
860
+
861
  def extract_answer(text: str) -> Optional[str]:
862
+ """Return the content of the first `<answer>...</answer>` block.
863
+
864
+ Tries two strategies, in order, and discards placeholder-only content
865
+ (bare ellipses) that the model sometimes echoes from the prompt:
866
+
867
+ 1. Well-formed `<answer>...</answer>` block.
868
+ 2. Truncated `<answer>...` with no closing tag (tokens ran out);
869
+ in that case we take everything after the opening tag.
870
+ """
871
+ cleaned = strip_think_blocks(text or "")
872
+
873
+ full_match = re.search(
874
+ r"<answer>\s*(.*?)\s*</answer>",
875
+ cleaned,
876
+ flags=re.DOTALL | re.IGNORECASE,
877
+ )
878
+ if full_match is not None:
879
+ candidate = full_match.group(1).strip()
880
+ if candidate and not _is_placeholder_answer(candidate):
881
+ return candidate
882
+ # Closed block was a placeholder / empty: fail fast. Do NOT fall
883
+ # through to the open-ended strategy, or it would re-match the same
884
+ # tag and incorrectly capture `...</answer>` as the answer.
885
+ return None
886
+
887
+ open_match = re.search(
888
+ r"<answer>\s*(.*)$", cleaned, flags=re.DOTALL | re.IGNORECASE
889
+ )
890
+ if open_match is not None:
891
+ candidate = open_match.group(1).strip()
892
+ if candidate and not _is_placeholder_answer(candidate):
893
+ return candidate
894
+
895
+ return None
896
 
897
 
898
  def parse_tool_call(text: str) -> Tuple[Optional[str], Optional[Dict[str, Any]], Optional[str]]:
899
+ cleaned = strip_think_blocks(text or "")
900
+ match = re.search(r"<tool_call>\s*(.*?)\s*</tool_call>", cleaned, flags=re.DOTALL | re.IGNORECASE)
901
  if not match:
902
  return None, None, None
903
  payload = match.group(1).strip()
 
1086
  preferred_model=primary_model,
1087
  candidate_models=fallback_models,
1088
  temperature=temperature,
1089
+ max_new_tokens=int(os.getenv("QUEST_MAX_NEW_TOKENS", "4096")),
1090
  )
1091
  model_output = raw_output
1092
  # Preserve the human-friendly model id for the trace even if the
 
1105
  tool_response = {"ok": False, "error": tool_err}
1106
  elif not tool_name:
1107
  # No explicit tool call and no final answer: force finalization.
1108
+ # IMPORTANT: do not write the literal characters `<answer>...</answer>`
1109
+ # here. Some models (notably the Qwen3 family that Quest-4B is
1110
+ # built on) will echo the template verbatim, which means the
1111
+ # extracted answer ends up being the three-dot placeholder `...`
1112
+ # and the user sees an empty-looking result.
1113
  messages.append(
1114
  {
1115
  "role": "user",
1116
+ "content": (
1117
+ "You did not call a tool and did not produce a final "
1118
+ "answer. Please now write your best final answer, "
1119
+ "wrapped between an opening <answer> tag and a "
1120
+ "closing </answer> tag. Put the real answer text "
1121
+ "between those tags; do not write a literal ellipsis "
1122
+ "or other placeholder. If the question asks for "
1123
+ "tabular data, use GitHub-Flavored Markdown pipe "
1124
+ "tables (`| col1 | col2 |` + `|---|---|`) and put a "
1125
+ "blank line before the first row so the table renders."
1126
+ ),
1127
  }
1128
  )
1129
  continue
 
1218
  "I could not finish a complete research answer within the configured turns. "
1219
  "Try increasing max turns or switching to a stronger model."
1220
  )
1221
+ else:
1222
+ final_answer = ensure_markdown_table_blank_lines(final_answer)
1223
 
1224
  citations = "\n".join(f"- {url}" for url in sorted(set(state.visited_urls)))
1225
  final_answer = f"**Model used:** `{used_model}`\n\n{final_answer}"