Z User commited on
Commit
f52e0ed
·
1 Parent(s): 51134db

修复:过滤模型思考标签 <|channel>thought / <channel|> 泄漏到用户界面

Browse files

问题:某些模型输出的内部思考标签 <|channel>thought ... <channel|>
未被 hermes-agent 现有的 _strip_think_blocks() 过滤,导致这些标签
以独立消息的形式出现在微信/飞书用户界面中。

修复:
- 新增 patch_strip_thinking_tags.py 补丁脚本
- 扩展 run_agent.py 的 _strip_think_blocks() 方法:
添加 <|channel>thought...<channel|> 闭合标签对匹配
添加 <|...> 未终止标签匹配
添加 <|...> 散落标签清理
- 扩展 gateway/stream_consumer.py 的流式过滤:
向 _OPEN_THINK_TAGS 添加 <|channel>thought, <|channel|>
向 _CLOSE_THINK_TAGS 添加 <channel|>
- Dockerfile 构建时应用补丁
- start.sh 自动更新后重新应用补丁

Files changed (3) hide show
  1. Dockerfile +4 -0
  2. scripts/patch_strip_thinking_tags.py +164 -0
  3. start.sh +3 -0
Dockerfile CHANGED
@@ -46,6 +46,10 @@ RUN python3 /tmp/patch_resolve_media_paths.py; rm -f /tmp/patch_resolve_media_pa
46
  COPY scripts/patch_weixin_cross_loop.py /tmp/patch_weixin_cross_loop.py
47
  RUN python3 /tmp/patch_weixin_cross_loop.py; rm -f /tmp/patch_weixin_cross_loop.py
48
 
 
 
 
 
49
  # Patch: DuckDuckGo free fallback for web_search (no API key needed)
50
  COPY scripts/patch_web_search_fallback.py /tmp/patch_web_search_fallback.py
51
  RUN python3 /tmp/patch_web_search_fallback.py; rm -f /tmp/patch_web_search_fallback.py
 
46
  COPY scripts/patch_weixin_cross_loop.py /tmp/patch_weixin_cross_loop.py
47
  RUN python3 /tmp/patch_weixin_cross_loop.py; rm -f /tmp/patch_weixin_cross_loop.py
48
 
49
+ # Patch: Strip <|channel>thinking / <channel|> model tags from user-visible output
50
+ COPY scripts/patch_strip_thinking_tags.py /tmp/patch_strip_thinking_tags.py
51
+ RUN python3 /tmp/patch_strip_thinking_tags.py; rm -f /tmp/patch_strip_thinking_tags.py
52
+
53
  # Patch: DuckDuckGo free fallback for web_search (no API key needed)
54
  COPY scripts/patch_web_search_fallback.py /tmp/patch_web_search_fallback.py
55
  RUN python3 /tmp/patch_web_search_fallback.py; rm -f /tmp/patch_web_search_fallback.py
scripts/patch_strip_thinking_tags.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Patch hermes-agent to strip <|channel>thought / <channel|> thinking tags.
3
+
4
+ PROBLEM: Some models output internal thinking tokens in the format
5
+ <|channel>thought ... <channel|>
6
+ that leak into user-facing messages on WeChat/Feishu. The existing
7
+ _strip_think_blocks() in run_agent.py handles <thought>, <thinking>,
8
+ <reasoning> etc., but NOT this <|...> pipe-delimited variant.
9
+
10
+ FIX: Extend both the agent-level tag stripper AND the gateway stream
11
+ consumer to recognise and suppress these tags.
12
+
13
+ Files patched:
14
+ 1. run_agent.py — _strip_think_blocks() + stray-tag cleanup
15
+ 2. gateway/stream_consumer.py — _OPEN_THINK_TAGS / _CLOSE_THINK_TAGS
16
+ """
17
+
18
+ import sys
19
+ import os
20
+ import glob
21
+ import re
22
+
23
+
24
+ def patch_run_agent(filepath: str) -> bool:
25
+ """Add <|channel>thinking / <channel|> patterns to _strip_think_blocks."""
26
+ with open(filepath, "r") as f:
27
+ content = f.read()
28
+
29
+ if "<|channel" in content and "_strip_think_blocks" in content:
30
+ # Check if already patched by looking for our specific addition
31
+ if '<|channel>thought' in content or '<|' in content and 'hermes-bot patch' in content:
32
+ print(f" run_agent.py already patched, skipping")
33
+ return True
34
+
35
+ applied = False
36
+
37
+ # ── Patch 1a: Add closed-tag regex for <|channel>thought ... <channel|> ──
38
+ # Find the block of re.sub calls for closed tag pairs.
39
+ # We add our pattern right after the <thought>.*?</thought> line.
40
+ old_thought = "content = re.sub(r'<thought>.*?</thought>', '', content, flags=re.DOTALL | re.IGNORECASE)"
41
+ new_thought = (
42
+ old_thought
43
+ + "\n # Hermes Bot patch: <|channel>thinking / <channel|> variants"
44
+ + "\n content = re.sub(r'<\\|channel>thought.*?<channel\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
45
+ + "\n content = re.sub(r'<\\|channel\\|>thought.*?<\\|channel\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
46
+ + "\n content = re.sub(r'<\\|[^|>]*\\|>.*?<\\|/[^|>]*\\|>', '', content, flags=re.DOTALL | re.IGNORECASE)"
47
+ )
48
+ if old_thought in content:
49
+ content = content.replace(old_thought, new_thought, 1)
50
+ applied = True
51
+ print(" [run_agent.py] Added closed-tag regex for <|...> thinking blocks")
52
+
53
+ # ── Patch 1b: Add unterminated open-tag pattern ──
54
+ # Find the unterminated block regex and extend it.
55
+ old_unterm = (
56
+ "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
57
+ )
58
+ new_unterm = (
59
+ "r'(?:^|\\n)[ \\t]*<(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)\\b[^>]*>.*$'"
60
+ + "\n + r'|(?:^|\\n)[ \\t]*<\\|[^|>]*>.*$'" # <|channel>thought
61
+ )
62
+ if old_unterm in content and not "<\\|[^|>]*>" in content.split("def _has_natural_response_ending")[0].split("def _strip_think_blocks")[1]:
63
+ content = content.replace(old_unterm, new_unterm, 1)
64
+ applied = True
65
+ print(" [run_agent.py] Extended unterminated-tag regex for <|...> variants")
66
+
67
+ # ── Patch 1c: Add stray orphan tag cleanup ──
68
+ # Find the stray tag cleanup and extend it.
69
+ old_stray = (
70
+ "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
71
+ )
72
+ new_stray = (
73
+ "r'</?(?:think|thinking|reasoning|thought|REASONING_SCRATCHPAD)>\\s*'"
74
+ + "\n + r'|<\\|[^|>]*\\|>\\s*'" # stray <|channel|> or <channel|>
75
+ )
76
+ if old_stray in content:
77
+ content = content.replace(old_stray, new_stray, 1)
78
+ applied = True
79
+ print(" [run_agent.py] Extended stray-tag cleanup for <|...> variants")
80
+
81
+ if applied:
82
+ with open(filepath, "w") as f:
83
+ f.write(content)
84
+ return True
85
+ else:
86
+ print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
87
+ return False
88
+
89
+
90
+ def patch_stream_consumer(filepath: str) -> bool:
91
+ """Add <|channel>thought / <channel|> to stream consumer think-tag lists."""
92
+ with open(filepath, "r") as f:
93
+ content = f.read()
94
+
95
+ applied = False
96
+
97
+ # ── Patch 2a: Add to _OPEN_THINK_TAGS ──
98
+ old_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n )'
99
+ new_open = ' _OPEN_THINK_TAGS = (\n "<REASONING_SCRATCHPAD>", "\U0001f9ae", "<reasoning>",\n "<THINKING>", "<thinking>", "<thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<|channel>thought", "<|channel|>",\n )'
100
+
101
+ if old_open in content:
102
+ content = content.replace(old_open, new_open, 1)
103
+ applied = True
104
+ print(" [stream_consumer.py] Added <|channel>thought to _OPEN_THINK_TAGS")
105
+
106
+ # ── Patch 2b: Add to _CLOSE_THINK_TAGS ──
107
+ old_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n )'
108
+ new_close = ' _CLOSE_THINK_TAGS = (\n "</REASONING_SCRATCHPAD>", "\U0001f9d0", "</reasoning>",\n "</THINKING>", "</thinking>", "</thought>",\n # Hermes Bot patch: <|...> thinking variants\n "<channel|>",\n )'
109
+
110
+ if old_close in content:
111
+ content = content.replace(old_close, new_close, 1)
112
+ applied = True
113
+ print(" [stream_consumer.py] Added <channel|> to _CLOSE_THINK_TAGS")
114
+
115
+ if applied:
116
+ with open(filepath, "w") as f:
117
+ f.write(content)
118
+ return True
119
+ else:
120
+ print(f" WARNING: Could not patch {filepath}", file=sys.stderr)
121
+ return False
122
+
123
+
124
+ if __name__ == "__main__":
125
+ results = {}
126
+
127
+ # Patch run_agent.py
128
+ run_agent_candidates = [
129
+ "/app/hermes-agent/run_agent.py",
130
+ ]
131
+ run_agent_candidates.extend(
132
+ glob.glob("/app/venv/lib/**/run_agent.py", recursive=True)
133
+ )
134
+
135
+ for c in run_agent_candidates:
136
+ if os.path.isfile(c):
137
+ results["run_agent.py"] = patch_run_agent(c)
138
+ break
139
+
140
+ # Patch gateway/stream_consumer.py
141
+ stream_candidates = [
142
+ "/app/hermes-agent/gateway/stream_consumer.py",
143
+ ]
144
+ stream_candidates.extend(
145
+ glob.glob("/app/venv/lib/**/gateway/stream_consumer.py", recursive=True)
146
+ )
147
+
148
+ for c in stream_candidates:
149
+ if os.path.isfile(c):
150
+ results["stream_consumer.py"] = patch_stream_consumer(c)
151
+ break
152
+
153
+ if not results:
154
+ print("WARNING: No target files found to patch", file=sys.stderr)
155
+ print(f" run_agent.py checked: {run_agent_candidates}", file=sys.stderr)
156
+ print(f" stream_consumer.py checked: {stream_candidates}", file=sys.stderr)
157
+ sys.exit(0)
158
+
159
+ all_ok = all(results.values())
160
+ if all_ok:
161
+ print(f"All patches applied successfully: {list(results.keys())}")
162
+ else:
163
+ print(f"Some patches failed: {results}", file=sys.stderr)
164
+ sys.exit(1)
start.sh CHANGED
@@ -551,6 +551,9 @@ update_hermes_agent_background() {
551
  if [ -f "/app/scripts/patch_web_search_fallback.py" ]; then
552
  python3 /app/scripts/patch_web_search_fallback.py 2>/dev/null
553
  fi
 
 
 
554
  # Copy patch files if they exist
555
  for patch_file in prompt_builder.py send_message_tool.py; do
556
  if [ -f "/app/patches/hermes-agent/agent/$patch_file" ] && [ -f "$AGENT_DIR/agent/$patch_file" ]; then
 
551
  if [ -f "/app/scripts/patch_web_search_fallback.py" ]; then
552
  python3 /app/scripts/patch_web_search_fallback.py 2>/dev/null
553
  fi
554
+ if [ -f "/app/scripts/patch_strip_thinking_tags.py" ]; then
555
+ python3 /app/scripts/patch_strip_thinking_tags.py 2>/dev/null
556
+ fi
557
  # Copy patch files if they exist
558
  for patch_file in prompt_builder.py send_message_tool.py; do
559
  if [ -f "/app/patches/hermes-agent/agent/$patch_file" ] && [ -f "$AGENT_DIR/agent/$patch_file" ]; then