Z User commited on
Commit
ba04984
·
1 Parent(s): e10e850

fix: LaTeX symbols leaking to user chat (\rightarrow etc.)

Browse files

PROBLEM: gemma-4-31b-it outputs LaTeX math expressions like
\rightarrow, \Rightarrow, \times, \alpha, \sum, etc. in responses.
These raw LaTeX commands show up as literal text on WeChat/Feishu
instead of being rendered as Unicode symbols.

FIX: Extended patch_strip_thinking_tags.py to also convert common
LaTeX math/logic symbols to their Unicode equivalents before the
response is sent to users.

Coverage: ~80 symbols including:
- Arrows: \rightarrow→ →, \Rightarrow→ ⇒, \to→ →, etc.
- Math: \times→ ×, \leq→ ≤, \geq→ ≥, \neq→ ≠, etc.
- Greek: \alpha→ α, \beta→ β, \pi→ π, etc.
- Logic: \forall→ ∀, \exists→ ∃, \in→ ∈, etc.
- Misc: \cdots→ ⋯, \sqrt→ √, \angle→ ∠, etc.

Also improved idempotency: skip patches already applied
to avoid duplicate insertions on repeated runs.

Files changed (1) hide show
  1. scripts/patch_strip_thinking_tags.py +162 -21
scripts/patch_strip_thinking_tags.py CHANGED
@@ -1,17 +1,19 @@
1
  #!/usr/bin/env python3
2
- """Patch hermes-agent to strip <|channel>thought / <channel|> thinking tags.
3
 
4
- PROBLEM: Some models output internal thinking tokens in the format
5
  <|channel>thought ... <channel|>
6
- that leak into user-facing messages on WeChat/Feishu. The existing
7
- _strip_think_blocks() in run_agent.py handles <thought>, <thinking>,
8
- <reasoning> etc., but NOT this <|...> pipe-delimited variant.
 
 
9
 
10
  FIX: Extend both the agent-level tag stripper AND the gateway stream
11
- consumer to recognise and suppress these tags.
12
 
13
  Files patched:
14
- 1. run_agent.py — _strip_think_blocks() + stray-tag cleanup
15
  2. gateway/stream_consumer.py — _OPEN_THINK_TAGS / _CLOSE_THINK_TAGS
16
  """
17
 
@@ -21,22 +23,113 @@ import glob
21
  import re
22
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  def patch_run_agent(filepath: str) -> bool:
25
- """Add <|channel>thinking / <channel|> patterns to _strip_think_blocks."""
26
  with open(filepath, "r") as f:
27
  content = f.read()
28
 
29
  if "<|channel" in content and "_strip_think_blocks" in content:
30
  # Check if already patched by looking for our specific addition
31
- if '<|channel>thought' in content or '<|' in content and 'hermes-bot patch' in content:
32
- print(f" run_agent.py already patched, skipping")
33
- return True
34
-
35
  applied = False
36
 
37
  # ── Patch 1a: Add closed-tag regex for <|channel>thought ... <channel|> ──
38
- # Find the block of re.sub calls for closed tag pairs.
39
- # We add our pattern right after the <thought>.*?</thought> line.
40
  old_thought = "content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)"
41
  new_thought = (
42
  old_thought
@@ -45,13 +138,12 @@ def patch_run_agent(filepath: str) -> bool:
45
  + "\n content = re.sub(r'<\\|channel\\|>thought.*?<\\|channel\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
46
  + "\n content = re.sub(r'<\\|[^|>]*\\|>.*?<\\|/[^|>]*\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
47
  )
48
- if old_thought in content:
49
  content = content.replace(old_thought, new_thought, 1)
50
  applied = True
51
  print(" [run_agent.py] Added closed-tag regex for <|...> thinking blocks")
52
 
53
  # ── Patch 1b: Add unterminated open-tag pattern ──
54
- # Find the unterminated block regex and extend it.
55
  old_unterm = (
56
  "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
57
  )
@@ -59,13 +151,12 @@ def patch_run_agent(filepath: str) -> bool:
59
  "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
60
  + "\n + r'|(?:^|\\n)[ \\t]*<\\|[^|>]*>.*$'" # <|channel>thought
61
  )
62
- if old_unterm in content and not "<\\|[^|>]*>" in content.split("def _has_natural_response_ending")[0].split("def _strip_think_blocks")[1]:
63
  content = content.replace(old_unterm, new_unterm, 1)
64
  applied = True
65
  print(" [run_agent.py] Extended unterminated-tag regex for <|...> variants")
66
 
67
  # ── Patch 1c: Add stray orphan tag cleanup ──
68
- # Find the stray tag cleanup and extend it.
69
  old_stray = (
70
  "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
71
  )
@@ -73,16 +164,63 @@ def patch_run_agent(filepath: str) -> bool:
73
  "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
74
  + "\n + r'|<\\|[^|>]*\\|>\\s*'" # stray <|channel|> or <channel|>
75
  )
76
- if old_stray in content:
77
  content = content.replace(old_stray, new_stray, 1)
78
  applied = True
79
  print(" [run_agent.py] Extended stray-tag cleanup for <|...> variants")
80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  if applied:
82
  with open(filepath, "w") as f:
83
  f.write(content)
84
  return True
85
  else:
 
 
 
 
86
  print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
87
  return False
88
 
@@ -98,7 +236,7 @@ def patch_stream_consumer(filepath: str) -> bool:
98
  old_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n )'
99
  new_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<|channel>thought", "<|channel|>",\n )'
100
 
101
- if old_open in content:
102
  content = content.replace(old_open, new_open, 1)
103
  applied = True
104
  print(" [stream_consumer.py] Added <|channel>thought to _OPEN_THINK_TAGS")
@@ -107,7 +245,7 @@ def patch_stream_consumer(filepath: str) -> bool:
107
  old_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n )'
108
  new_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<channel|>",\n )'
109
 
110
- if old_close in content:
111
  content = content.replace(old_close, new_close, 1)
112
  applied = True
113
  print(" [stream_consumer.py] Added <channel|> to _CLOSE_THINK_TAGS")
@@ -117,6 +255,9 @@ def patch_stream_consumer(filepath: str) -> bool:
117
  f.write(content)
118
  return True
119
  else:
 
 
 
120
  print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
121
  return False
122
 
 
1
  #!/usr/bin/env python3
2
+ """Patch hermes-agent to strip thinking tags AND LaTeX symbols from user output.
3
 
4
+ PROBLEM 1: Some models output internal thinking tokens in the format
5
  <|channel>thought ... <channel|>
6
+ that leak into user-facing messages on WeChat/Feishu.
7
+
8
+ PROBLEM 2: Some models output LaTeX math expressions like \rightarrow,
9
+ \leftarrow, \Rightarrow, \times, etc. that should be rendered as Unicode
10
+ arrows/symbols but instead show up as raw \command text.
11
 
12
  FIX: Extend both the agent-level tag stripper AND the gateway stream
13
+ consumer, plus add LaTeX -> Unicode conversion.
14
 
15
  Files patched:
16
+ 1. run_agent.py — _strip_think_blocks() + stray-tag cleanup + LaTeX cleanup
17
  2. gateway/stream_consumer.py — _OPEN_THINK_TAGS / _CLOSE_THINK_TAGS
18
  """
19
 
 
23
  import re
24
 
25
 
26
+ # LaTeX -> Unicode mapping for common math/logic symbols
27
+ LATEX_TO_UNICODE = [
28
+ # Arrows
29
+ (r'\\rightarrow', '\u2192'), # →
30
+ (r'\\leftarrow', '\u2190'), # ←
31
+ (r'\\Rightarrow', '\u21D2'), # ⇒
32
+ (r'\\Leftarrow', '\u21D0'), # ⇐
33
+ (r'\\leftrightarrow', '\u2194'), # ↔
34
+ (r'\\Leftrightarrow', '\u21D4'), # ⇔
35
+ (r'\\mapsto', '\u21A6'), # ↦
36
+ (r'\\uparrow', '\u2191'), # ↑
37
+ (r'\\downarrow', '\u2193'), # ↓
38
+ (r'\\Uparrow', '\u21D1'), # ⇑
39
+ (r'\\Downarrow', '\u21D3'), # ⇓
40
+ # Math operators
41
+ (r'\\times', '\u00D7'), # ×
42
+ (r'\\div', '\u00F7'), # ÷
43
+ (r'\\pm', '\u00B1'), # ±
44
+ (r'\\leq', '\u2264'), # ≤
45
+ (r'\\geq', '\u2265'), # ≥
46
+ (r'\\neq', '\u2260'), # ≠
47
+ (r'\\approx', '\u2248'), # ≈
48
+ (r'\\equiv', '\u2261'), # ≡
49
+ (r'\\sim', '\u223C'), # ∼
50
+ (r'\\simeq', '\u2243'), # ≃
51
+ (r'\\propto', '\u221D'), # ∝
52
+ (r'\\infty', '\u221E'), # ∞
53
+ (r'\\partial', '\u2202'), # ∂
54
+ (r'\\nabla', '\u2207'), # ∇
55
+ (r'\\forall', '\u2200'), # ∀
56
+ (r'\\exists', '\u2203'), # ∃
57
+ (r'\\in', '\u2208'), # ∈
58
+ (r'\\notin', '\u2209'), # ∉
59
+ (r'\\subset', '\u2282'), # ⊂
60
+ (r'\\supset', '\u2283'), # ⊃
61
+ (r'\\cup', '\u222A'), # ∪
62
+ (r'\\cap', '\u2229'), # ∩
63
+ (r'\\emptyset', '\u2205'), # ∅
64
+ (r'\\sum', '\u2211'), # ∑
65
+ (r'\\prod', '\u220F'), # ∏
66
+ (r'\\int', '\u222B'), # ∫
67
+ (r'\\sqrt', '\u221A'), # √
68
+ (r'\\angle', '\u2220'), # ∠
69
+ (r'\\perp', '\u22A5'), # ⊥
70
+ (r'\\parallel', '\u2225'), # ∥
71
+ (r'\\cong', '\u2245'), # ≅
72
+ (r'\\to', '\u2192'), # →
73
+ (r'\\gets', '\u2190'), # ←
74
+ # Greek letters (common)
75
+ (r'\\alpha', '\u03B1'), # α
76
+ (r'\\beta', '\u03B2'), # β
77
+ (r'\\gamma', '\u03B3'), # γ
78
+ (r'\\delta', '\u03B4'), # δ
79
+ (r'\\epsilon', '\u03B5'), # ε
80
+ (r'\\zeta', '\u03B6'), # ζ
81
+ (r'\\eta', '\u03B7'), # η
82
+ (r'\\theta', '\u03B8'), # θ
83
+ (r'\\lambda', '\u03BB'), # λ
84
+ (r'\\mu', '\u03BC'), # μ
85
+ (r'\\sigma', '\u03C3'), # σ
86
+ (r'\\omega', '\u03C9'), # ω
87
+ (r'\\pi', '\u03C0'), # π
88
+ (r'\\phi', '\u03C6'), # φ
89
+ (r'\\psi', '\u03C8'), # ψ
90
+ (r'\\rho', '\u03C1'), # ρ
91
+ (r'\\tau', '\u03C4'), # τ
92
+ # Special
93
+ (r'\\cdots', '\u22EF'), # ⋯
94
+ (r'\\ldots', '\u2026'), # …
95
+ (r'\\dots', '\u2026'), # …
96
+ (r'\\Rightarrow', '\u21D2'), # ⇒
97
+ (r'\\iff', '\u21D4'), # ⇔
98
+ (r'\\implies', '\u21D2'), # ⇒
99
+ (r'\\therefore', '\u2234'), # ∴
100
+ (r'\\because', '\u2235'), # ∵
101
+ (r'\\checkmark', '\u2713'), # ✓
102
+ (r'\\times', '\u00D7'), # ×
103
+ (r'\\deg', '\u00B0'), # °
104
+ (r'\\cdot', '\u00B7'), # ·
105
+ ]
106
+
107
+
108
+ def _build_latex_replacements() -> str:
109
+ """Build the re.sub chain for LaTeX -> Unicode conversion."""
110
+ lines = []
111
+ for latex, unicode_char in LATEX_TO_UNICODE:
112
+ # Escape backslashes for Python string
113
+ escaped = latex.replace('\\', '\\\\')
114
+ lines.append(
115
+ f" content = content.replace('{escaped}', '{unicode_char}')"
116
+ )
117
+ return "\n".join(lines)
118
+
119
+
120
  def patch_run_agent(filepath: str) -> bool:
121
+ """Add <|channel>thinking / <channel|> patterns + LaTeX cleanup to run_agent.py."""
122
  with open(filepath, "r") as f:
123
  content = f.read()
124
 
125
  if "<|channel" in content and "_strip_think_blocks" in content:
126
  # Check if already patched by looking for our specific addition
127
+ if '<|channel>thought' in content or ('<|' in content and 'hermes-bot patch' in content):
128
+ print(f" run_agent.py thinking-tags already patched")
129
+
 
130
  applied = False
131
 
132
  # ── Patch 1a: Add closed-tag regex for <|channel>thought ... <channel|> ──
 
 
133
  old_thought = "content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)"
134
  new_thought = (
135
  old_thought
 
138
  + "\n content = re.sub(r'<\\|channel\\|>thought.*?<\\|channel\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
139
  + "\n content = re.sub(r'<\\|[^|>]*\\|>.*?<\\|/[^|>]*\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
140
  )
141
+ if old_thought in content and 'hermes-bot patch: <|channel' not in content:
142
  content = content.replace(old_thought, new_thought, 1)
143
  applied = True
144
  print(" [run_agent.py] Added closed-tag regex for <|...> thinking blocks")
145
 
146
  # ── Patch 1b: Add unterminated open-tag pattern ──
 
147
  old_unterm = (
148
  "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
149
  )
 
151
  "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
152
  + "\n + r'|(?:^|\\n)[ \\t]*<\\|[^|>]*>.*$'" # <|channel>thought
153
  )
154
+ if old_unterm in content and '<\\|[^|>]*>' not in content.split("def _has_natural_response_ending")[0].split("def _strip_think_blocks")[1]:
155
  content = content.replace(old_unterm, new_unterm, 1)
156
  applied = True
157
  print(" [run_agent.py] Extended unterminated-tag regex for <|...> variants")
158
 
159
  # ── Patch 1c: Add stray orphan tag cleanup ──
 
160
  old_stray = (
161
  "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
162
  )
 
164
  "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
165
  + "\n + r'|<\\|[^|>]*\\|>\\s*'" # stray <|channel|> or <channel|>
166
  )
167
+ if old_stray in content and '<\\|[^|>]*\\|>\\s*' not in content.split("def _has_natural_response_ending")[0].split("def _strip_think_blocks")[1]:
168
  content = content.replace(old_stray, new_stray, 1)
169
  applied = True
170
  print(" [run_agent.py] Extended stray-tag cleanup for <|...> variants")
171
 
172
+ # ── Patch 1d: Add LaTeX -> Unicode conversion after _strip_think_blocks ──
173
+ # Insert right before the final "return content" in _strip_think_blocks
174
+ latex_marker = " # Hermes Bot patch: LaTeX -> Unicode conversion"
175
+ if latex_marker not in content:
176
+ # Find the return statement at end of _strip_think_blocks
177
+ # Look for "return content" that's indented with spaces (inside a function)
178
+ import_pattern = " # Hermes Bot patch: LaTeX -> Unicode cleanup\n"
179
+ import_pattern += _build_latex_replacements()
180
+
181
+ # Try multiple patterns for where to insert
182
+ return_patterns = [
183
+ " return content\n def ",
184
+ " return content\n\n def ",
185
+ ]
186
+ for pat in return_patterns:
187
+ if pat in content:
188
+ content = content.replace(pat, import_pattern + "\n" + pat, 1)
189
+ applied = True
190
+ print(" [run_agent.py] Added LaTeX -> Unicode conversion")
191
+ break
192
+
193
+ # Fallback: insert before any "return content" in the function
194
+ if 'LaTeX -> Unicode' not in content:
195
+ # Use a simpler approach: find the function end
196
+ idx = content.find("def _strip_think_blocks")
197
+ if idx > 0:
198
+ # Find the next function definition after _strip_think_blocks
199
+ next_def = content.find("\n def ", idx + 10)
200
+ if next_def > 0:
201
+ # Find the last "return content" before next_def
202
+ search_area = content[idx:next_def]
203
+ # Find return content
204
+ ret_idx = search_area.rfind("return content")
205
+ if ret_idx > 0:
206
+ abs_ret_idx = idx + ret_idx
207
+ insert_pos = content.rfind("\n", 0, abs_ret_idx) + 1
208
+ content = content[:insert_pos] + import_pattern + "\n" + content[insert_pos:]
209
+ applied = True
210
+ print(" [run_agent.py] Added LaTeX -> Unicode conversion (fallback)")
211
+
212
+ elif latex_marker in content:
213
+ print(" [run_agent.py] LaTeX conversion already patched")
214
+
215
  if applied:
216
  with open(filepath, "w") as f:
217
  f.write(content)
218
  return True
219
  else:
220
+ # Even if no new patches applied, existing ones may be present
221
+ if 'hermes-bot patch' in content:
222
+ print(f" run_agent.py: existing patches found, no new changes needed")
223
+ return True
224
  print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
225
  return False
226
 
 
236
  old_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n )'
237
  new_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<|channel>thought", "<|channel|>",\n )'
238
 
239
+ if old_open in content and '<|channel>thought' not in content:
240
  content = content.replace(old_open, new_open, 1)
241
  applied = True
242
  print(" [stream_consumer.py] Added <|channel>thought to _OPEN_THINK_TAGS")
 
245
  old_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n )'
246
  new_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<channel|>",\n )'
247
 
248
+ if old_close in content and '<channel|>' not in content.split('_CLOSE_THINK_TAGS')[1].split('\n\n')[0]:
249
  content = content.replace(old_close, new_close, 1)
250
  applied = True
251
  print(" [stream_consumer.py] Added <channel|> to _CLOSE_THINK_TAGS")
 
255
  f.write(content)
256
  return True
257
  else:
258
+ if '<|channel>thought' in content or '<channel|>' in content:
259
+ print(" stream_consumer.py: existing patches found, no new changes needed")
260
+ return True
261
  print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
262
  return False
263