akseljoonas HF Staff commited on
Commit
c85e4f2
·
1 Parent(s): 887da19

Improve tool output quality: ANSI stripping, tail-biased truncation, noise suppression

Browse files

- Strip ANSI escape sequences from bash/job output (sandbox, local, jobs)
- Switch to tail-biased truncation (25/75 head/tail split) so errors and
final results are preserved instead of setup noise
- Spill full output to temp file when truncated, with metadata telling
the agent how to read specific sections via offset/limit
- Add UV_NO_PROGRESS=1 and PYTHONWARNINGS env vars to suppress install
progress and deprecation warnings at source
- Add atomic file writes, fuzzy edit matching, and Python validation
to sandbox server and local tools

agent/tools/edit_utils.py ADDED
@@ -0,0 +1,280 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Shared utilities for file editing tools — fuzzy matching, syntax validation,
3
+ and richer edit operations.
4
+
5
+ Used by both local_tools.py and the embedded sandbox server.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ # ── Unicode normalization map ────────────────────────────────────────────
11
+
12
+ UNICODE_MAP = {
13
+ "\u2013": "-", # en-dash
14
+ "\u2014": "-", # em-dash
15
+ "\u2212": "-", # minus sign
16
+ "\u2018": "'", # left single quote
17
+ "\u2019": "'", # right single quote
18
+ "\u201c": '"', # left double quote
19
+ "\u201d": '"', # right double quote
20
+ "\u00a0": " ", # non-breaking space
21
+ "\u2003": " ", # em space
22
+ "\u2002": " ", # en space
23
+ "\u200b": "", # zero-width space
24
+ "\ufeff": "", # BOM
25
+ }
26
+
27
+
28
+ def _normalize_unicode(s: str) -> str:
29
+ return "".join(UNICODE_MAP.get(c, c) for c in s)
30
+
31
+
32
+ # ── 4-pass fuzzy matching ────────────────────────────────────────────────
33
+
34
+
35
+ def fuzzy_find(content: str, pattern: str) -> tuple[int | None, str | None]:
36
+ """Find *pattern* in *content* with increasingly relaxed matching.
37
+
38
+ Returns (start_index_in_original_content, match_note) or (None, None).
39
+ The index always refers to the *original* content string so callers can
40
+ use ``content[idx : idx + len(matched_text)]`` for replacement.
41
+
42
+ Strategy (mirrors Codex):
43
+ 1. Exact match
44
+ 2. Right-trim each line (trailing whitespace)
45
+ 3. Both-sides trim (all surrounding whitespace per line)
46
+ 4. Unicode normalization on top of both-sides trim
47
+ """
48
+ # Pass 1 — exact
49
+ if pattern in content:
50
+ return content.index(pattern), None
51
+
52
+ # Helper: build a line-stripped version *and* a mapping from stripped
53
+ # positions back to original positions. We need this so callers can
54
+ # apply the replacement on the original content, not the stripped copy.
55
+
56
+ def _build_stripped(text: str, strip_fn):
57
+ """Return (stripped_text, line_start_map).
58
+
59
+ line_start_map[i] = original byte offset of the start of line i.
60
+ """
61
+ orig_lines = text.split("\n")
62
+ stripped_lines = [strip_fn(l) for l in orig_lines]
63
+ return "\n".join(stripped_lines), orig_lines, stripped_lines
64
+
65
+ # Pass 2 — right-trim
66
+ c_rt, c_orig_lines, c_rt_lines = _build_stripped(content, str.rstrip)
67
+ p_rt = "\n".join(l.rstrip() for l in pattern.split("\n"))
68
+ idx = c_rt.find(p_rt)
69
+ if idx != -1:
70
+ orig_idx = _map_back(idx, c_orig_lines, c_rt_lines)
71
+ return orig_idx, "(matched after trimming trailing whitespace)"
72
+
73
+ # Pass 3 — both-sides trim
74
+ c_st, _, c_st_lines = _build_stripped(content, str.strip)
75
+ p_st = "\n".join(l.strip() for l in pattern.split("\n"))
76
+ idx = c_st.find(p_st)
77
+ if idx != -1:
78
+ orig_idx = _map_back(idx, c_orig_lines, c_st_lines)
79
+ return orig_idx, "(matched after trimming whitespace)"
80
+
81
+ # Pass 4 — unicode normalization + both-sides trim
82
+ c_norm = _normalize_unicode(c_st)
83
+ p_norm = _normalize_unicode(p_st)
84
+ idx = c_norm.find(p_norm)
85
+ if idx != -1:
86
+ orig_idx = _map_back(idx, c_orig_lines, c_st_lines)
87
+ return orig_idx, "(matched after unicode normalization)"
88
+
89
+ return None, None
90
+
91
+
92
+ def _map_back(
93
+ stripped_idx: int,
94
+ orig_lines: list[str],
95
+ stripped_lines: list[str],
96
+ ) -> int:
97
+ """Map a character index in the stripped/joined text back to the original text."""
98
+ # Walk through stripped lines to find which line the index falls on
99
+ pos = 0
100
+ for i, sl in enumerate(stripped_lines):
101
+ line_end = pos + len(sl)
102
+ if stripped_idx <= line_end:
103
+ col_in_stripped = stripped_idx - pos
104
+ # Find where this stripped line's content starts in the original line
105
+ ol = orig_lines[i]
106
+ # The stripped line is a subset of the original line; find its offset
107
+ lstripped = len(ol) - len(ol.lstrip())
108
+ orig_col = lstripped + col_in_stripped
109
+ # Compute absolute position in original text
110
+ orig_pos = sum(len(orig_lines[j]) + 1 for j in range(i)) + orig_col
111
+ return orig_pos
112
+ pos = line_end + 1 # +1 for the \n
113
+ # Fallback: return 0 (shouldn't happen if idx is valid)
114
+ return 0
115
+
116
+
117
+ def fuzzy_find_original_match(content: str, pattern: str) -> tuple[str | None, str | None]:
118
+ """Find the *original* text in content that matches pattern fuzzily.
119
+
120
+ Returns (original_matched_text, match_note) or (None, None).
121
+ This extracts the exact substring from the original content that
122
+ corresponds to the fuzzy match, preserving its original whitespace/unicode.
123
+ """
124
+ if pattern in content:
125
+ return pattern, None
126
+
127
+ idx, note = fuzzy_find(content, pattern)
128
+ if idx is None:
129
+ return None, None
130
+
131
+ # We need to find the original text span that corresponds to the match.
132
+ # The match covers len(pattern) worth of *logical* content.
133
+ # Count how many original lines the pattern spans.
134
+ pattern_lines = pattern.split("\n")
135
+ n_lines = len(pattern_lines)
136
+
137
+ # Find which original line the match starts on
138
+ orig_lines = content.split("\n")
139
+ char_pos = 0
140
+ start_line = 0
141
+ for i, ol in enumerate(orig_lines):
142
+ if char_pos + len(ol) >= idx:
143
+ start_line = i
144
+ break
145
+ char_pos += len(ol) + 1
146
+
147
+ end_line = min(start_line + n_lines, len(orig_lines))
148
+ # Extract the original lines that were matched
149
+ matched_lines = orig_lines[start_line:end_line]
150
+ original_text = "\n".join(matched_lines)
151
+ return original_text, note
152
+
153
+
154
+ # ── Richer edit operations ───────────────────────────────────────────────
155
+
156
+
157
+ def apply_edit(
158
+ content: str,
159
+ old_str: str,
160
+ new_str: str,
161
+ mode: str = "replace",
162
+ replace_all: bool = False,
163
+ ) -> tuple[str, int, str | None]:
164
+ """Apply an edit operation to content.
165
+
166
+ Modes:
167
+ - replace: replace first occurrence (or all if replace_all=True)
168
+ - replace_all: replace all occurrences (alias)
169
+ - append_after: insert new_str after old_str
170
+ - prepend_before: insert new_str before old_str
171
+
172
+ Returns (new_content, num_replacements, fuzzy_note).
173
+ Raises ValueError if old_str not found.
174
+ """
175
+ if mode == "replace_all":
176
+ replace_all = True
177
+ mode = "replace"
178
+
179
+ # Try exact match first, then fuzzy
180
+ fuzzy_note = None
181
+ if old_str not in content:
182
+ original_match, fuzzy_note = fuzzy_find_original_match(content, old_str)
183
+ if original_match is None:
184
+ raise ValueError("old_str not found in file.")
185
+ old_str = original_match
186
+
187
+ count = content.count(old_str)
188
+
189
+ if mode == "replace":
190
+ if count > 1 and not replace_all:
191
+ raise ValueError(
192
+ f"old_str appears {count} times. Use replace_all=true to replace all, "
193
+ "or provide a more specific old_str."
194
+ )
195
+ if replace_all:
196
+ new_content = content.replace(old_str, new_str)
197
+ return new_content, count, fuzzy_note
198
+ else:
199
+ new_content = content.replace(old_str, new_str, 1)
200
+ return new_content, 1, fuzzy_note
201
+
202
+ elif mode == "append_after":
203
+ if replace_all:
204
+ new_content = content.replace(old_str, old_str + new_str)
205
+ return new_content, count, fuzzy_note
206
+ else:
207
+ idx = content.index(old_str) + len(old_str)
208
+ new_content = content[:idx] + new_str + content[idx:]
209
+ return new_content, 1, fuzzy_note
210
+
211
+ elif mode == "prepend_before":
212
+ if replace_all:
213
+ new_content = content.replace(old_str, new_str + old_str)
214
+ return new_content, count, fuzzy_note
215
+ else:
216
+ idx = content.index(old_str)
217
+ new_content = content[:idx] + new_str + content[idx:]
218
+ return new_content, 1, fuzzy_note
219
+
220
+ else:
221
+ raise ValueError(f"Unknown edit mode: {mode}. Use replace, append_after, or prepend_before.")
222
+
223
+
224
+ # ── Syntax validation (Python) ───────────────────────────────────────────
225
+
226
+
227
+ def validate_python(content: str, path: str = "") -> list[str]:
228
+ """Lightweight post-write validation for Python files.
229
+
230
+ Returns a list of warning strings (empty = all good).
231
+ Never raises — validation failures are advisory only.
232
+ """
233
+ import ast
234
+ import importlib
235
+
236
+ warnings = []
237
+
238
+ # 1. Syntax check via ast.parse
239
+ try:
240
+ tree = ast.parse(content)
241
+ except SyntaxError as e:
242
+ warnings.append(f"Python syntax error at line {e.lineno}: {e.msg}")
243
+ return warnings # can't do import checks on broken syntax
244
+
245
+ # 2. Validate imports resolve
246
+ for node in ast.walk(tree):
247
+ if isinstance(node, ast.ImportFrom):
248
+ if node.module:
249
+ try:
250
+ mod = importlib.import_module(node.module)
251
+ for alias in node.names:
252
+ if alias.name != "*" and not hasattr(mod, alias.name):
253
+ warnings.append(
254
+ f"Import warning: '{alias.name}' not found in '{node.module}' (line {node.lineno})"
255
+ )
256
+ except ImportError as e:
257
+ warnings.append(f"Import error: {e} (line {node.lineno})")
258
+ except Exception:
259
+ pass # skip non-importable modules (e.g. project-local)
260
+ elif isinstance(node, ast.Import):
261
+ for alias in node.names:
262
+ try:
263
+ importlib.import_module(alias.name)
264
+ except ImportError as e:
265
+ warnings.append(f"Import error: {e} (line {node.lineno})")
266
+ except Exception:
267
+ pass
268
+
269
+ # 3. Training script heuristics
270
+ if any(kw in content for kw in ("TrainingArguments", "SFTConfig", "DPOConfig", "GRPOConfig")):
271
+ if "push_to_hub" not in content:
272
+ warnings.append(
273
+ "Training script warning: no 'push_to_hub' found — model may be lost when job ends"
274
+ )
275
+ if "hub_model_id" not in content:
276
+ warnings.append(
277
+ "Training script warning: no 'hub_model_id' found"
278
+ )
279
+
280
+ return warnings
agent/tools/jobs_tool.py CHANGED
@@ -117,11 +117,19 @@ def _filter_uv_install_output(logs: list[str]) -> list[str]:
117
  return logs
118
 
119
 
 
 
 
 
 
 
 
120
  _DEFAULT_ENV = {
121
  "HF_HUB_DISABLE_PROGRESS_BARS": "1",
122
  "TQDM_DISABLE": "1",
123
  "TRANSFORMERS_VERBOSITY": "warning",
124
  "HF_HUB_ENABLE_HF_TRANSFER": "1",
 
125
  }
126
 
127
 
@@ -580,7 +588,7 @@ class HfJobsTool:
580
  filtered_logs = _filter_uv_install_output(all_logs)
581
 
582
  # Format all logs for the agent
583
- log_text = "\n".join(filtered_logs) if filtered_logs else "(no logs)"
584
 
585
  response = f"""{job_type} job completed!
586
 
@@ -657,7 +665,7 @@ class HfJobsTool:
657
  "resultsShared": 0,
658
  }
659
 
660
- log_text = "\n".join(logs)
661
  return {
662
  "formatted": f"**Logs for {job_id}:**\n\n```\n{log_text}\n```",
663
  "totalResults": 1,
 
117
  return logs
118
 
119
 
120
+ _ANSI_RE = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]|\x1b\].*?\x07')
121
+
122
+
123
+ def _strip_ansi(text: str) -> str:
124
+ return _ANSI_RE.sub('', text)
125
+
126
+
127
  _DEFAULT_ENV = {
128
  "HF_HUB_DISABLE_PROGRESS_BARS": "1",
129
  "TQDM_DISABLE": "1",
130
  "TRANSFORMERS_VERBOSITY": "warning",
131
  "HF_HUB_ENABLE_HF_TRANSFER": "1",
132
+ "UV_NO_PROGRESS": "1",
133
  }
134
 
135
 
 
588
  filtered_logs = _filter_uv_install_output(all_logs)
589
 
590
  # Format all logs for the agent
591
+ log_text = _strip_ansi("\n".join(filtered_logs)) if filtered_logs else "(no logs)"
592
 
593
  response = f"""{job_type} job completed!
594
 
 
665
  "resultsShared": 0,
666
  }
667
 
668
+ log_text = _strip_ansi("\n".join(logs))
669
  return {
670
  "formatted": f"**Logs for {job_id}:**\n\n```\n{log_text}\n```",
671
  "totalResults": 1,
agent/tools/local_tools.py CHANGED
@@ -8,18 +8,78 @@ subprocess/pathlib instead of going through a remote sandbox.
8
 
9
  from __future__ import annotations
10
 
 
 
11
  import subprocess
 
12
  from pathlib import Path
13
  from typing import Any
14
 
15
  from agent.tools.sandbox_client import Sandbox
16
 
17
- MAX_OUTPUT_CHARS = 30_000
18
  MAX_LINE_LENGTH = 2000
19
  DEFAULT_READ_LINES = 2000
20
  DEFAULT_TIMEOUT = 120
21
  MAX_TIMEOUT = 600
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # ── Handlers ────────────────────────────────────────────────────────────
25
 
@@ -38,9 +98,8 @@ async def _bash_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
38
  cwd=work_dir,
39
  timeout=timeout,
40
  )
41
- output = result.stdout + result.stderr
42
- if len(output) > MAX_OUTPUT_CHARS:
43
- output = output[:MAX_OUTPUT_CHARS] + "\n... (output truncated)"
44
  if not output.strip():
45
  output = "(no output)"
46
  return output, result.returncode == 0
@@ -83,18 +142,27 @@ async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
83
  return "No path provided.", False
84
  p = Path(file_path)
85
  try:
86
- p.parent.mkdir(parents=True, exist_ok=True)
87
- p.write_text(content)
88
- return f"Wrote {len(content)} bytes to {file_path}", True
 
 
 
 
 
 
89
  except Exception as e:
90
  return f"write error: {e}", False
91
 
92
 
93
  async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
 
 
94
  file_path = args.get("path", "")
95
  old_str = args.get("old_str", "")
96
  new_str = args.get("new_str", "")
97
  replace_all = args.get("replace_all", False)
 
98
 
99
  if not file_path:
100
  return "No path provided.", False
@@ -110,23 +178,27 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
110
  except Exception as e:
111
  return f"edit read error: {e}", False
112
 
113
- count = text.count(old_str)
114
- if count == 0:
115
- return "old_str not found in file.", False
116
- if count > 1 and not replace_all:
117
- return (
118
- f"old_str appears {count} times. Use replace_all=true to replace all, "
119
- "or provide a more specific old_str."
120
- ), False
121
 
122
- new_text = text.replace(old_str, new_str) if replace_all else text.replace(old_str, new_str, 1)
123
  try:
124
- p.write_text(new_text)
125
  except Exception as e:
126
  return f"edit write error: {e}", False
127
 
128
- replacements = count if replace_all else 1
129
- return f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})", True
 
 
 
 
 
 
 
130
 
131
 
132
  # ── Public API ──────────────────────────────────────────────────────────
 
8
 
9
  from __future__ import annotations
10
 
11
+ import os
12
+ import re
13
  import subprocess
14
+ import tempfile
15
  from pathlib import Path
16
  from typing import Any
17
 
18
  from agent.tools.sandbox_client import Sandbox
19
 
20
+ MAX_OUTPUT_CHARS = 25_000
21
  MAX_LINE_LENGTH = 2000
22
  DEFAULT_READ_LINES = 2000
23
  DEFAULT_TIMEOUT = 120
24
  MAX_TIMEOUT = 600
25
 
26
+ _ANSI_RE = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]|\x1b\].*?\x07')
27
+
28
+
29
+ def _atomic_write(path: Path, content: str) -> None:
30
+ """Write file atomically via temp file + os.replace().
31
+
32
+ Ensures the file is never left in a partial/corrupted state — it's either
33
+ the old content or the new content, never half-written.
34
+ """
35
+ path.parent.mkdir(parents=True, exist_ok=True)
36
+ fd = None
37
+ tmp_path = None
38
+ try:
39
+ fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
40
+ os.write(fd, content.encode("utf-8"))
41
+ os.fsync(fd)
42
+ os.close(fd)
43
+ fd = None
44
+ os.replace(tmp_path, str(path))
45
+ tmp_path = None # successfully replaced, nothing to clean up
46
+ finally:
47
+ if fd is not None:
48
+ os.close(fd)
49
+ if tmp_path is not None:
50
+ try:
51
+ os.unlink(tmp_path)
52
+ except OSError:
53
+ pass
54
+
55
+
56
+ def _strip_ansi(text: str) -> str:
57
+ return _ANSI_RE.sub('', text)
58
+
59
+
60
+ def _truncate_output(output: str, max_chars: int = MAX_OUTPUT_CHARS, head_ratio: float = 0.25) -> str:
61
+ """Tail-biased truncation with temp file spillover for full output access."""
62
+ if len(output) <= max_chars:
63
+ return output
64
+ # Write full output to temp file so LLM can read specific sections
65
+ spill_path = None
66
+ try:
67
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', delete=False) as f:
68
+ f.write(output)
69
+ spill_path = f.name
70
+ except Exception:
71
+ pass
72
+ head_budget = int(max_chars * head_ratio)
73
+ tail_budget = max_chars - head_budget
74
+ head = output[:head_budget]
75
+ tail = output[-tail_budget:]
76
+ total = len(output)
77
+ omitted = total - max_chars
78
+ meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
79
+ if spill_path:
80
+ meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
81
+ return head + meta + tail
82
+
83
 
84
  # ── Handlers ────────────────────────────────────────────────────────────
85
 
 
98
  cwd=work_dir,
99
  timeout=timeout,
100
  )
101
+ output = _strip_ansi(result.stdout + result.stderr)
102
+ output = _truncate_output(output)
 
103
  if not output.strip():
104
  output = "(no output)"
105
  return output, result.returncode == 0
 
142
  return "No path provided.", False
143
  p = Path(file_path)
144
  try:
145
+ _atomic_write(p, content)
146
+ msg = f"Wrote {len(content)} bytes to {file_path}"
147
+ # Syntax validation for Python files
148
+ if p.suffix == ".py":
149
+ from agent.tools.edit_utils import validate_python
150
+ warnings = validate_python(content, file_path)
151
+ if warnings:
152
+ msg += "\n\nValidation warnings:\n" + "\n".join(f" ⚠ {w}" for w in warnings)
153
+ return msg, True
154
  except Exception as e:
155
  return f"write error: {e}", False
156
 
157
 
158
  async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
159
+ from agent.tools.edit_utils import apply_edit, validate_python
160
+
161
  file_path = args.get("path", "")
162
  old_str = args.get("old_str", "")
163
  new_str = args.get("new_str", "")
164
  replace_all = args.get("replace_all", False)
165
+ mode = args.get("mode", "replace")
166
 
167
  if not file_path:
168
  return "No path provided.", False
 
178
  except Exception as e:
179
  return f"edit read error: {e}", False
180
 
181
+ try:
182
+ new_text, replacements, fuzzy_note = apply_edit(
183
+ text, old_str, new_str, mode=mode, replace_all=replace_all
184
+ )
185
+ except ValueError as e:
186
+ return str(e), False
 
 
187
 
 
188
  try:
189
+ _atomic_write(p, new_text)
190
  except Exception as e:
191
  return f"edit write error: {e}", False
192
 
193
+ msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
194
+ if fuzzy_note:
195
+ msg += f" {fuzzy_note}"
196
+ # Syntax validation for Python files
197
+ if p.suffix == ".py":
198
+ warnings = validate_python(new_text, file_path)
199
+ if warnings:
200
+ msg += "\n\nValidation warnings:\n" + "\n".join(f" ⚠ {w}" for w in warnings)
201
+ return msg, True
202
 
203
 
204
  # ── Public API ──────────────────────────────────────────────────────────
agent/tools/sandbox_client.py CHANGED
@@ -56,7 +56,7 @@ HARDWARE_OPTIONS = [
56
  "a10g-large",
57
  "a100-large",
58
  ]
59
- OUTPUT_LIMIT = 30000
60
  LINE_LIMIT = 2000
61
  DEFAULT_READ_LIMIT = 2000
62
  DEFAULT_TIMEOUT = 240
@@ -85,7 +85,9 @@ ENV HOME=/home/user \\
85
  PIP_USER=1 \\
86
  HF_HUB_DISABLE_PROGRESS_BARS=1 \\
87
  TQDM_DISABLE=1 \\
88
- HF_HUB_ENABLE_HF_TRANSFER=1
 
 
89
 
90
  WORKDIR /app
91
  COPY --chown=user . /app
@@ -97,12 +99,61 @@ CMD ["python", "sandbox_server.py"]
97
 
98
  _SANDBOX_SERVER = '''\
99
  """Minimal FastAPI server for sandbox operations."""
100
- import os, subprocess, pathlib, signal, threading
101
  from fastapi import FastAPI
102
  from pydantic import BaseModel
103
  from typing import Optional
104
  import uvicorn
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  app = FastAPI()
107
 
108
  # Track active bash processes so they can be killed on cancel
@@ -128,10 +179,123 @@ class EditReq(BaseModel):
128
  old_str: str
129
  new_str: str
130
  replace_all: bool = False
 
131
 
132
  class ExistsReq(BaseModel):
133
  path: str
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  @app.get("/api/health")
136
  def health():
137
  return {"status": "ok"}
@@ -147,9 +311,8 @@ def bash(req: BashReq):
147
  _active_procs[proc.pid] = proc
148
  try:
149
  stdout, stderr = proc.communicate(timeout=req.timeout)
150
- output = stdout + stderr
151
- if len(output) > 30000:
152
- output = output[:30000] + "\\n... (truncated)"
153
  return {"success": proc.returncode == 0, "output": output, "error": "" if proc.returncode == 0 else f"Exit code {proc.returncode}"}
154
  except subprocess.TimeoutExpired:
155
  try:
@@ -203,9 +366,13 @@ def read(req: ReadReq):
203
  def write(req: WriteReq):
204
  try:
205
  p = pathlib.Path(req.path)
206
- p.parent.mkdir(parents=True, exist_ok=True)
207
- p.write_text(req.content)
208
- return {"success": True, "output": f"Wrote {len(req.content)} bytes to {req.path}", "error": ""}
 
 
 
 
209
  except Exception as e:
210
  return {"success": False, "output": "", "error": str(e)}
211
 
@@ -216,16 +383,23 @@ def edit(req: EditReq):
216
  if not p.exists():
217
  return {"success": False, "output": "", "error": f"File not found: {req.path}"}
218
  content = p.read_text()
219
- if req.old_str not in content:
220
- return {"success": False, "output": "", "error": f"old_str not found in {req.path}"}
221
- if not req.replace_all and content.count(req.old_str) > 1:
222
- return {"success": False, "output": "", "error": f"old_str appears {content.count(req.old_str)} times. Use replace_all=true or provide more context."}
223
- if req.replace_all:
224
- new_content = content.replace(req.old_str, req.new_str)
225
- else:
226
- new_content = content.replace(req.old_str, req.new_str, 1)
227
- p.write_text(new_content)
228
- return {"success": True, "output": f"Edited {req.path}", "error": ""}
 
 
 
 
 
 
 
229
  except Exception as e:
230
  return {"success": False, "output": "", "error": str(e)}
231
 
@@ -605,7 +779,8 @@ class Sandbox:
605
  return result
606
 
607
  def edit(
608
- self, path: str, old_str: str, new_str: str, *, replace_all: bool = False
 
609
  ) -> ToolResult:
610
  if old_str == new_str:
611
  return ToolResult(success=False, error="old_str and new_str are identical.")
@@ -621,6 +796,7 @@ class Sandbox:
621
  "old_str": old_str,
622
  "new_str": new_str,
623
  "replace_all": replace_all,
 
624
  },
625
  )
626
 
@@ -731,7 +907,12 @@ class Sandbox:
731
  },
732
  "edit": {
733
  "description": (
734
- "Targeted edit via exact string replacement.\n"
 
 
 
 
 
735
  "\n"
736
  "Rules:\n"
737
  "- old_str must appear EXACTLY once (unless replace_all is true).\n"
@@ -742,7 +923,9 @@ class Sandbox:
742
  "- File MUST have been read this session (system enforced).\n"
743
  "- Do NOT include line number prefixes in old_str/new_str.\n"
744
  "\n"
745
- "Use replace_all=true for batch operations like variable renaming."
 
 
746
  ),
747
  "parameters": {
748
  "type": "object",
@@ -755,14 +938,20 @@ class Sandbox:
755
  },
756
  "old_str": {
757
  "type": "string",
758
- "description": "Exact text to find (must differ from new_str).",
759
  },
760
- "new_str": {"type": "string", "description": "Replacement text."},
761
  "replace_all": {
762
  "type": "boolean",
763
  "description": "Replace all occurrences (default: false).",
764
  "default": False,
765
  },
 
 
 
 
 
 
766
  },
767
  },
768
  },
@@ -791,6 +980,7 @@ class Sandbox:
791
  a["old_str"],
792
  a["new_str"],
793
  replace_all=a.get("replace_all", False),
 
794
  ),
795
  }
796
  fn = dispatch.get(name)
 
56
  "a10g-large",
57
  "a100-large",
58
  ]
59
+ OUTPUT_LIMIT = 25000
60
  LINE_LIMIT = 2000
61
  DEFAULT_READ_LIMIT = 2000
62
  DEFAULT_TIMEOUT = 240
 
85
  PIP_USER=1 \\
86
  HF_HUB_DISABLE_PROGRESS_BARS=1 \\
87
  TQDM_DISABLE=1 \\
88
+ HF_HUB_ENABLE_HF_TRANSFER=1 \\
89
+ UV_NO_PROGRESS=1 \\
90
+ PYTHONWARNINGS=ignore::DeprecationWarning
91
 
92
  WORKDIR /app
93
  COPY --chown=user . /app
 
99
 
100
  _SANDBOX_SERVER = '''\
101
  """Minimal FastAPI server for sandbox operations."""
102
+ import os, subprocess, pathlib, signal, threading, re, tempfile
103
  from fastapi import FastAPI
104
  from pydantic import BaseModel
105
  from typing import Optional
106
  import uvicorn
107
 
108
+ _ANSI_RE = re.compile(r'\\x1b\\[[0-9;]*[a-zA-Z]|\\x1b\\].*?\\x07')
109
+
110
+ def _strip_ansi(text: str) -> str:
111
+ return _ANSI_RE.sub('', text)
112
+
113
+ def _truncate_output(output: str, max_chars: int = 25000, head_ratio: float = 0.25) -> str:
114
+ if len(output) <= max_chars:
115
+ return output
116
+ # Write full output to temp file so LLM can read specific sections
117
+ spill_path = None
118
+ try:
119
+ with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', dir='/tmp', delete=False) as f:
120
+ f.write(output)
121
+ spill_path = f.name
122
+ except Exception:
123
+ pass
124
+ head_budget = int(max_chars * head_ratio)
125
+ tail_budget = max_chars - head_budget
126
+ head = output[:head_budget]
127
+ tail = output[-tail_budget:]
128
+ total = len(output)
129
+ omitted = total - max_chars
130
+ meta = f"\\n\\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\\n"
131
+ if spill_path:
132
+ meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\\n"
133
+ return head + meta + tail
134
+
135
+ def _atomic_write(path: pathlib.Path, content: str):
136
+ """Write atomically: temp file + fsync + os.replace."""
137
+ path.parent.mkdir(parents=True, exist_ok=True)
138
+ fd = None
139
+ tmp_path = None
140
+ try:
141
+ fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
142
+ os.write(fd, content.encode("utf-8"))
143
+ os.fsync(fd)
144
+ os.close(fd)
145
+ fd = None
146
+ os.replace(tmp_path, str(path))
147
+ tmp_path = None
148
+ finally:
149
+ if fd is not None:
150
+ os.close(fd)
151
+ if tmp_path is not None:
152
+ try:
153
+ os.unlink(tmp_path)
154
+ except OSError:
155
+ pass
156
+
157
  app = FastAPI()
158
 
159
  # Track active bash processes so they can be killed on cancel
 
179
  old_str: str
180
  new_str: str
181
  replace_all: bool = False
182
+ mode: str = "replace"
183
 
184
  class ExistsReq(BaseModel):
185
  path: str
186
 
187
+ # ── Fuzzy matching & edit utilities (embedded) ──
188
+
189
+ UNICODE_MAP = {
190
+ "\\u2013": "-", "\\u2014": "-", "\\u2212": "-",
191
+ "\\u2018": "'", "\\u2019": "'",
192
+ "\\u201c": \'"\', "\\u201d": \'"\',
193
+ "\\u00a0": " ", "\\u2003": " ", "\\u2002": " ",
194
+ "\\u200b": "", "\\ufeff": "",
195
+ }
196
+
197
+ def _normalize_unicode(s):
198
+ return "".join(UNICODE_MAP.get(c, c) for c in s)
199
+
200
+ def _fuzzy_find_original(content, pattern):
201
+ """Find the original text in content that matches pattern fuzzily."""
202
+ if pattern in content:
203
+ return pattern, None
204
+ # Pass 2: right-trim
205
+ c_lines = content.split("\\n")
206
+ c_rt = "\\n".join(l.rstrip() for l in c_lines)
207
+ p_rt = "\\n".join(l.rstrip() for l in pattern.split("\\n"))
208
+ if p_rt in c_rt:
209
+ idx = c_rt.index(p_rt)
210
+ start_line = c_rt[:idx].count("\\n")
211
+ n_lines = p_rt.count("\\n") + 1
212
+ matched = "\\n".join(c_lines[start_line:start_line + n_lines])
213
+ return matched, "(matched after trimming trailing whitespace)"
214
+ # Pass 3: both-sides trim
215
+ c_st = "\\n".join(l.strip() for l in c_lines)
216
+ p_st = "\\n".join(l.strip() for l in pattern.split("\\n"))
217
+ if p_st in c_st:
218
+ idx = c_st.index(p_st)
219
+ start_line = c_st[:idx].count("\\n")
220
+ n_lines = p_st.count("\\n") + 1
221
+ matched = "\\n".join(c_lines[start_line:start_line + n_lines])
222
+ return matched, "(matched after trimming whitespace)"
223
+ # Pass 4: unicode normalization
224
+ c_norm = _normalize_unicode(c_st)
225
+ p_norm = _normalize_unicode(p_st)
226
+ if p_norm in c_norm:
227
+ idx = c_norm.index(p_norm)
228
+ start_line = c_norm[:idx].count("\\n")
229
+ n_lines = p_norm.count("\\n") + 1
230
+ matched = "\\n".join(c_lines[start_line:start_line + n_lines])
231
+ return matched, "(matched after unicode normalization)"
232
+ return None, None
233
+
234
+ def _apply_edit(content, old_str, new_str, mode="replace", replace_all=False):
235
+ """Apply edit. Returns (new_content, count, fuzzy_note) or raises ValueError."""
236
+ if mode == "replace_all":
237
+ replace_all = True
238
+ mode = "replace"
239
+ fuzzy_note = None
240
+ if old_str not in content:
241
+ matched, fuzzy_note = _fuzzy_find_original(content, old_str)
242
+ if matched is None:
243
+ raise ValueError("old_str not found in file.")
244
+ old_str = matched
245
+ count = content.count(old_str)
246
+ if mode == "replace":
247
+ if count > 1 and not replace_all:
248
+ raise ValueError(f"old_str appears {count} times. Use replace_all=true or provide more context.")
249
+ if replace_all:
250
+ return content.replace(old_str, new_str), count, fuzzy_note
251
+ return content.replace(old_str, new_str, 1), 1, fuzzy_note
252
+ elif mode == "append_after":
253
+ if replace_all:
254
+ return content.replace(old_str, old_str + new_str), count, fuzzy_note
255
+ idx = content.index(old_str) + len(old_str)
256
+ return content[:idx] + new_str + content[idx:], 1, fuzzy_note
257
+ elif mode == "prepend_before":
258
+ if replace_all:
259
+ return content.replace(old_str, new_str + old_str), count, fuzzy_note
260
+ idx = content.index(old_str)
261
+ return content[:idx] + new_str + content[idx:], 1, fuzzy_note
262
+ raise ValueError(f"Unknown mode: {mode}")
263
+
264
+ def _validate_python(content, path=""):
265
+ """Lightweight Python validation. Returns list of warning strings."""
266
+ import ast as _ast, importlib as _il
267
+ warnings = []
268
+ try:
269
+ tree = _ast.parse(content)
270
+ except SyntaxError as e:
271
+ warnings.append(f"Python syntax error at line {e.lineno}: {e.msg}")
272
+ return warnings
273
+ for node in _ast.walk(tree):
274
+ if isinstance(node, _ast.ImportFrom) and node.module:
275
+ try:
276
+ mod = _il.import_module(node.module)
277
+ for alias in node.names:
278
+ if alias.name != "*" and not hasattr(mod, alias.name):
279
+ warnings.append(f"Import warning: '{alias.name}' not found in '{node.module}' (line {node.lineno})")
280
+ except ImportError as e:
281
+ warnings.append(f"Import error: {e} (line {node.lineno})")
282
+ except Exception:
283
+ pass
284
+ elif isinstance(node, _ast.Import):
285
+ for alias in node.names:
286
+ try:
287
+ _il.import_module(alias.name)
288
+ except ImportError as e:
289
+ warnings.append(f"Import error: {e} (line {node.lineno})")
290
+ except Exception:
291
+ pass
292
+ if any(kw in content for kw in ("TrainingArguments", "SFTConfig", "DPOConfig", "GRPOConfig")):
293
+ if "push_to_hub" not in content:
294
+ warnings.append("Training script warning: no \'push_to_hub\' found")
295
+ if "hub_model_id" not in content:
296
+ warnings.append("Training script warning: no \'hub_model_id\' found")
297
+ return warnings
298
+
299
  @app.get("/api/health")
300
  def health():
301
  return {"status": "ok"}
 
311
  _active_procs[proc.pid] = proc
312
  try:
313
  stdout, stderr = proc.communicate(timeout=req.timeout)
314
+ output = _strip_ansi(stdout + stderr)
315
+ output = _truncate_output(output)
 
316
  return {"success": proc.returncode == 0, "output": output, "error": "" if proc.returncode == 0 else f"Exit code {proc.returncode}"}
317
  except subprocess.TimeoutExpired:
318
  try:
 
366
  def write(req: WriteReq):
367
  try:
368
  p = pathlib.Path(req.path)
369
+ _atomic_write(p, req.content)
370
+ msg = f"Wrote {len(req.content)} bytes to {req.path}"
371
+ if p.suffix == ".py":
372
+ warnings = _validate_python(req.content, req.path)
373
+ if warnings:
374
+ msg += "\\n\\nValidation warnings:\\n" + "\\n".join(f" ! {w}" for w in warnings)
375
+ return {"success": True, "output": msg, "error": ""}
376
  except Exception as e:
377
  return {"success": False, "output": "", "error": str(e)}
378
 
 
383
  if not p.exists():
384
  return {"success": False, "output": "", "error": f"File not found: {req.path}"}
385
  content = p.read_text()
386
+ if req.old_str == req.new_str:
387
+ return {"success": False, "output": "", "error": "old_str and new_str must differ."}
388
+ try:
389
+ new_content, count, fuzzy_note = _apply_edit(
390
+ content, req.old_str, req.new_str, mode=req.mode, replace_all=req.replace_all
391
+ )
392
+ except ValueError as e:
393
+ return {"success": False, "output": "", "error": str(e)}
394
+ _atomic_write(p, new_content)
395
+ msg = f"Edited {req.path} ({count} replacement{'s' if count > 1 else ''})"
396
+ if fuzzy_note:
397
+ msg += f" {fuzzy_note}"
398
+ if p.suffix == ".py":
399
+ warnings = _validate_python(new_content, req.path)
400
+ if warnings:
401
+ msg += "\\n\\nValidation warnings:\\n" + "\\n".join(f" ! {w}" for w in warnings)
402
+ return {"success": True, "output": msg, "error": ""}
403
  except Exception as e:
404
  return {"success": False, "output": "", "error": str(e)}
405
 
 
779
  return result
780
 
781
  def edit(
782
+ self, path: str, old_str: str, new_str: str, *, replace_all: bool = False,
783
+ mode: str = "replace",
784
  ) -> ToolResult:
785
  if old_str == new_str:
786
  return ToolResult(success=False, error="old_str and new_str are identical.")
 
796
  "old_str": old_str,
797
  "new_str": new_str,
798
  "replace_all": replace_all,
799
+ "mode": mode,
800
  },
801
  )
802
 
 
907
  },
908
  "edit": {
909
  "description": (
910
+ "Targeted edit via string replacement with fuzzy matching fallback.\n"
911
+ "\n"
912
+ "Modes:\n"
913
+ "- replace (default): replace first occurrence of old_str with new_str.\n"
914
+ "- append_after: insert new_str immediately after old_str (old_str is kept).\n"
915
+ "- prepend_before: insert new_str immediately before old_str (old_str is kept).\n"
916
  "\n"
917
  "Rules:\n"
918
  "- old_str must appear EXACTLY once (unless replace_all is true).\n"
 
923
  "- File MUST have been read this session (system enforced).\n"
924
  "- Do NOT include line number prefixes in old_str/new_str.\n"
925
  "\n"
926
+ "If exact match fails, the tool automatically tries trimmed/normalized matching.\n"
927
+ "Use replace_all=true for batch operations like variable renaming.\n"
928
+ "Use append_after/prepend_before to insert code without replacing existing code."
929
  ),
930
  "parameters": {
931
  "type": "object",
 
938
  },
939
  "old_str": {
940
  "type": "string",
941
+ "description": "Text to find (fuzzy matching used as fallback).",
942
  },
943
+ "new_str": {"type": "string", "description": "Replacement text (or text to insert for append_after/prepend_before)."},
944
  "replace_all": {
945
  "type": "boolean",
946
  "description": "Replace all occurrences (default: false).",
947
  "default": False,
948
  },
949
+ "mode": {
950
+ "type": "string",
951
+ "enum": ["replace", "append_after", "prepend_before"],
952
+ "description": "Edit mode (default: replace).",
953
+ "default": "replace",
954
+ },
955
  },
956
  },
957
  },
 
980
  a["old_str"],
981
  a["new_str"],
982
  replace_all=a.get("replace_all", False),
983
+ mode=a.get("mode", "replace"),
984
  ),
985
  }
986
  fn = dispatch.get(name)