Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
c85e4f2
1
Parent(s): 887da19
Improve tool output quality: ANSI stripping, tail-biased truncation, noise suppression
Browse files- Strip ANSI escape sequences from bash/job output (sandbox, local, jobs)
- Switch to tail-biased truncation (25/75 head/tail split) so errors and
final results are preserved instead of setup noise
- Spill full output to temp file when truncated, with metadata telling
the agent how to read specific sections via offset/limit
- Add UV_NO_PROGRESS=1 and PYTHONWARNINGS env vars to suppress install
progress and deprecation warnings at source
- Add atomic file writes, fuzzy edit matching, and Python validation
to sandbox server and local tools
- agent/tools/edit_utils.py +280 -0
- agent/tools/jobs_tool.py +10 -2
- agent/tools/local_tools.py +91 -19
- agent/tools/sandbox_client.py +214 -24
agent/tools/edit_utils.py
ADDED
|
@@ -0,0 +1,280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Shared utilities for file editing tools — fuzzy matching, syntax validation,
|
| 3 |
+
and richer edit operations.
|
| 4 |
+
|
| 5 |
+
Used by both local_tools.py and the embedded sandbox server.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
# ── Unicode normalization map ────────────────────────────────────────────
|
| 11 |
+
|
| 12 |
+
UNICODE_MAP = {
|
| 13 |
+
"\u2013": "-", # en-dash
|
| 14 |
+
"\u2014": "-", # em-dash
|
| 15 |
+
"\u2212": "-", # minus sign
|
| 16 |
+
"\u2018": "'", # left single quote
|
| 17 |
+
"\u2019": "'", # right single quote
|
| 18 |
+
"\u201c": '"', # left double quote
|
| 19 |
+
"\u201d": '"', # right double quote
|
| 20 |
+
"\u00a0": " ", # non-breaking space
|
| 21 |
+
"\u2003": " ", # em space
|
| 22 |
+
"\u2002": " ", # en space
|
| 23 |
+
"\u200b": "", # zero-width space
|
| 24 |
+
"\ufeff": "", # BOM
|
| 25 |
+
}
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _normalize_unicode(s: str) -> str:
|
| 29 |
+
return "".join(UNICODE_MAP.get(c, c) for c in s)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# ── 4-pass fuzzy matching ────────────────────────────────────────────────
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def fuzzy_find(content: str, pattern: str) -> tuple[int | None, str | None]:
|
| 36 |
+
"""Find *pattern* in *content* with increasingly relaxed matching.
|
| 37 |
+
|
| 38 |
+
Returns (start_index_in_original_content, match_note) or (None, None).
|
| 39 |
+
The index always refers to the *original* content string so callers can
|
| 40 |
+
use ``content[idx : idx + len(matched_text)]`` for replacement.
|
| 41 |
+
|
| 42 |
+
Strategy (mirrors Codex):
|
| 43 |
+
1. Exact match
|
| 44 |
+
2. Right-trim each line (trailing whitespace)
|
| 45 |
+
3. Both-sides trim (all surrounding whitespace per line)
|
| 46 |
+
4. Unicode normalization on top of both-sides trim
|
| 47 |
+
"""
|
| 48 |
+
# Pass 1 — exact
|
| 49 |
+
if pattern in content:
|
| 50 |
+
return content.index(pattern), None
|
| 51 |
+
|
| 52 |
+
# Helper: build a line-stripped version *and* a mapping from stripped
|
| 53 |
+
# positions back to original positions. We need this so callers can
|
| 54 |
+
# apply the replacement on the original content, not the stripped copy.
|
| 55 |
+
|
| 56 |
+
def _build_stripped(text: str, strip_fn):
|
| 57 |
+
"""Return (stripped_text, line_start_map).
|
| 58 |
+
|
| 59 |
+
line_start_map[i] = original byte offset of the start of line i.
|
| 60 |
+
"""
|
| 61 |
+
orig_lines = text.split("\n")
|
| 62 |
+
stripped_lines = [strip_fn(l) for l in orig_lines]
|
| 63 |
+
return "\n".join(stripped_lines), orig_lines, stripped_lines
|
| 64 |
+
|
| 65 |
+
# Pass 2 — right-trim
|
| 66 |
+
c_rt, c_orig_lines, c_rt_lines = _build_stripped(content, str.rstrip)
|
| 67 |
+
p_rt = "\n".join(l.rstrip() for l in pattern.split("\n"))
|
| 68 |
+
idx = c_rt.find(p_rt)
|
| 69 |
+
if idx != -1:
|
| 70 |
+
orig_idx = _map_back(idx, c_orig_lines, c_rt_lines)
|
| 71 |
+
return orig_idx, "(matched after trimming trailing whitespace)"
|
| 72 |
+
|
| 73 |
+
# Pass 3 — both-sides trim
|
| 74 |
+
c_st, _, c_st_lines = _build_stripped(content, str.strip)
|
| 75 |
+
p_st = "\n".join(l.strip() for l in pattern.split("\n"))
|
| 76 |
+
idx = c_st.find(p_st)
|
| 77 |
+
if idx != -1:
|
| 78 |
+
orig_idx = _map_back(idx, c_orig_lines, c_st_lines)
|
| 79 |
+
return orig_idx, "(matched after trimming whitespace)"
|
| 80 |
+
|
| 81 |
+
# Pass 4 — unicode normalization + both-sides trim
|
| 82 |
+
c_norm = _normalize_unicode(c_st)
|
| 83 |
+
p_norm = _normalize_unicode(p_st)
|
| 84 |
+
idx = c_norm.find(p_norm)
|
| 85 |
+
if idx != -1:
|
| 86 |
+
orig_idx = _map_back(idx, c_orig_lines, c_st_lines)
|
| 87 |
+
return orig_idx, "(matched after unicode normalization)"
|
| 88 |
+
|
| 89 |
+
return None, None
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def _map_back(
|
| 93 |
+
stripped_idx: int,
|
| 94 |
+
orig_lines: list[str],
|
| 95 |
+
stripped_lines: list[str],
|
| 96 |
+
) -> int:
|
| 97 |
+
"""Map a character index in the stripped/joined text back to the original text."""
|
| 98 |
+
# Walk through stripped lines to find which line the index falls on
|
| 99 |
+
pos = 0
|
| 100 |
+
for i, sl in enumerate(stripped_lines):
|
| 101 |
+
line_end = pos + len(sl)
|
| 102 |
+
if stripped_idx <= line_end:
|
| 103 |
+
col_in_stripped = stripped_idx - pos
|
| 104 |
+
# Find where this stripped line's content starts in the original line
|
| 105 |
+
ol = orig_lines[i]
|
| 106 |
+
# The stripped line is a subset of the original line; find its offset
|
| 107 |
+
lstripped = len(ol) - len(ol.lstrip())
|
| 108 |
+
orig_col = lstripped + col_in_stripped
|
| 109 |
+
# Compute absolute position in original text
|
| 110 |
+
orig_pos = sum(len(orig_lines[j]) + 1 for j in range(i)) + orig_col
|
| 111 |
+
return orig_pos
|
| 112 |
+
pos = line_end + 1 # +1 for the \n
|
| 113 |
+
# Fallback: return 0 (shouldn't happen if idx is valid)
|
| 114 |
+
return 0
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def fuzzy_find_original_match(content: str, pattern: str) -> tuple[str | None, str | None]:
|
| 118 |
+
"""Find the *original* text in content that matches pattern fuzzily.
|
| 119 |
+
|
| 120 |
+
Returns (original_matched_text, match_note) or (None, None).
|
| 121 |
+
This extracts the exact substring from the original content that
|
| 122 |
+
corresponds to the fuzzy match, preserving its original whitespace/unicode.
|
| 123 |
+
"""
|
| 124 |
+
if pattern in content:
|
| 125 |
+
return pattern, None
|
| 126 |
+
|
| 127 |
+
idx, note = fuzzy_find(content, pattern)
|
| 128 |
+
if idx is None:
|
| 129 |
+
return None, None
|
| 130 |
+
|
| 131 |
+
# We need to find the original text span that corresponds to the match.
|
| 132 |
+
# The match covers len(pattern) worth of *logical* content.
|
| 133 |
+
# Count how many original lines the pattern spans.
|
| 134 |
+
pattern_lines = pattern.split("\n")
|
| 135 |
+
n_lines = len(pattern_lines)
|
| 136 |
+
|
| 137 |
+
# Find which original line the match starts on
|
| 138 |
+
orig_lines = content.split("\n")
|
| 139 |
+
char_pos = 0
|
| 140 |
+
start_line = 0
|
| 141 |
+
for i, ol in enumerate(orig_lines):
|
| 142 |
+
if char_pos + len(ol) >= idx:
|
| 143 |
+
start_line = i
|
| 144 |
+
break
|
| 145 |
+
char_pos += len(ol) + 1
|
| 146 |
+
|
| 147 |
+
end_line = min(start_line + n_lines, len(orig_lines))
|
| 148 |
+
# Extract the original lines that were matched
|
| 149 |
+
matched_lines = orig_lines[start_line:end_line]
|
| 150 |
+
original_text = "\n".join(matched_lines)
|
| 151 |
+
return original_text, note
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# ── Richer edit operations ───────────────────────────────────────────────
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def apply_edit(
|
| 158 |
+
content: str,
|
| 159 |
+
old_str: str,
|
| 160 |
+
new_str: str,
|
| 161 |
+
mode: str = "replace",
|
| 162 |
+
replace_all: bool = False,
|
| 163 |
+
) -> tuple[str, int, str | None]:
|
| 164 |
+
"""Apply an edit operation to content.
|
| 165 |
+
|
| 166 |
+
Modes:
|
| 167 |
+
- replace: replace first occurrence (or all if replace_all=True)
|
| 168 |
+
- replace_all: replace all occurrences (alias)
|
| 169 |
+
- append_after: insert new_str after old_str
|
| 170 |
+
- prepend_before: insert new_str before old_str
|
| 171 |
+
|
| 172 |
+
Returns (new_content, num_replacements, fuzzy_note).
|
| 173 |
+
Raises ValueError if old_str not found.
|
| 174 |
+
"""
|
| 175 |
+
if mode == "replace_all":
|
| 176 |
+
replace_all = True
|
| 177 |
+
mode = "replace"
|
| 178 |
+
|
| 179 |
+
# Try exact match first, then fuzzy
|
| 180 |
+
fuzzy_note = None
|
| 181 |
+
if old_str not in content:
|
| 182 |
+
original_match, fuzzy_note = fuzzy_find_original_match(content, old_str)
|
| 183 |
+
if original_match is None:
|
| 184 |
+
raise ValueError("old_str not found in file.")
|
| 185 |
+
old_str = original_match
|
| 186 |
+
|
| 187 |
+
count = content.count(old_str)
|
| 188 |
+
|
| 189 |
+
if mode == "replace":
|
| 190 |
+
if count > 1 and not replace_all:
|
| 191 |
+
raise ValueError(
|
| 192 |
+
f"old_str appears {count} times. Use replace_all=true to replace all, "
|
| 193 |
+
"or provide a more specific old_str."
|
| 194 |
+
)
|
| 195 |
+
if replace_all:
|
| 196 |
+
new_content = content.replace(old_str, new_str)
|
| 197 |
+
return new_content, count, fuzzy_note
|
| 198 |
+
else:
|
| 199 |
+
new_content = content.replace(old_str, new_str, 1)
|
| 200 |
+
return new_content, 1, fuzzy_note
|
| 201 |
+
|
| 202 |
+
elif mode == "append_after":
|
| 203 |
+
if replace_all:
|
| 204 |
+
new_content = content.replace(old_str, old_str + new_str)
|
| 205 |
+
return new_content, count, fuzzy_note
|
| 206 |
+
else:
|
| 207 |
+
idx = content.index(old_str) + len(old_str)
|
| 208 |
+
new_content = content[:idx] + new_str + content[idx:]
|
| 209 |
+
return new_content, 1, fuzzy_note
|
| 210 |
+
|
| 211 |
+
elif mode == "prepend_before":
|
| 212 |
+
if replace_all:
|
| 213 |
+
new_content = content.replace(old_str, new_str + old_str)
|
| 214 |
+
return new_content, count, fuzzy_note
|
| 215 |
+
else:
|
| 216 |
+
idx = content.index(old_str)
|
| 217 |
+
new_content = content[:idx] + new_str + content[idx:]
|
| 218 |
+
return new_content, 1, fuzzy_note
|
| 219 |
+
|
| 220 |
+
else:
|
| 221 |
+
raise ValueError(f"Unknown edit mode: {mode}. Use replace, append_after, or prepend_before.")
|
| 222 |
+
|
| 223 |
+
|
| 224 |
+
# ── Syntax validation (Python) ───────────────────────────────────────────
|
| 225 |
+
|
| 226 |
+
|
| 227 |
+
def validate_python(content: str, path: str = "") -> list[str]:
|
| 228 |
+
"""Lightweight post-write validation for Python files.
|
| 229 |
+
|
| 230 |
+
Returns a list of warning strings (empty = all good).
|
| 231 |
+
Never raises — validation failures are advisory only.
|
| 232 |
+
"""
|
| 233 |
+
import ast
|
| 234 |
+
import importlib
|
| 235 |
+
|
| 236 |
+
warnings = []
|
| 237 |
+
|
| 238 |
+
# 1. Syntax check via ast.parse
|
| 239 |
+
try:
|
| 240 |
+
tree = ast.parse(content)
|
| 241 |
+
except SyntaxError as e:
|
| 242 |
+
warnings.append(f"Python syntax error at line {e.lineno}: {e.msg}")
|
| 243 |
+
return warnings # can't do import checks on broken syntax
|
| 244 |
+
|
| 245 |
+
# 2. Validate imports resolve
|
| 246 |
+
for node in ast.walk(tree):
|
| 247 |
+
if isinstance(node, ast.ImportFrom):
|
| 248 |
+
if node.module:
|
| 249 |
+
try:
|
| 250 |
+
mod = importlib.import_module(node.module)
|
| 251 |
+
for alias in node.names:
|
| 252 |
+
if alias.name != "*" and not hasattr(mod, alias.name):
|
| 253 |
+
warnings.append(
|
| 254 |
+
f"Import warning: '{alias.name}' not found in '{node.module}' (line {node.lineno})"
|
| 255 |
+
)
|
| 256 |
+
except ImportError as e:
|
| 257 |
+
warnings.append(f"Import error: {e} (line {node.lineno})")
|
| 258 |
+
except Exception:
|
| 259 |
+
pass # skip non-importable modules (e.g. project-local)
|
| 260 |
+
elif isinstance(node, ast.Import):
|
| 261 |
+
for alias in node.names:
|
| 262 |
+
try:
|
| 263 |
+
importlib.import_module(alias.name)
|
| 264 |
+
except ImportError as e:
|
| 265 |
+
warnings.append(f"Import error: {e} (line {node.lineno})")
|
| 266 |
+
except Exception:
|
| 267 |
+
pass
|
| 268 |
+
|
| 269 |
+
# 3. Training script heuristics
|
| 270 |
+
if any(kw in content for kw in ("TrainingArguments", "SFTConfig", "DPOConfig", "GRPOConfig")):
|
| 271 |
+
if "push_to_hub" not in content:
|
| 272 |
+
warnings.append(
|
| 273 |
+
"Training script warning: no 'push_to_hub' found — model may be lost when job ends"
|
| 274 |
+
)
|
| 275 |
+
if "hub_model_id" not in content:
|
| 276 |
+
warnings.append(
|
| 277 |
+
"Training script warning: no 'hub_model_id' found"
|
| 278 |
+
)
|
| 279 |
+
|
| 280 |
+
return warnings
|
agent/tools/jobs_tool.py
CHANGED
|
@@ -117,11 +117,19 @@ def _filter_uv_install_output(logs: list[str]) -> list[str]:
|
|
| 117 |
return logs
|
| 118 |
|
| 119 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
_DEFAULT_ENV = {
|
| 121 |
"HF_HUB_DISABLE_PROGRESS_BARS": "1",
|
| 122 |
"TQDM_DISABLE": "1",
|
| 123 |
"TRANSFORMERS_VERBOSITY": "warning",
|
| 124 |
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
|
|
|
| 125 |
}
|
| 126 |
|
| 127 |
|
|
@@ -580,7 +588,7 @@ class HfJobsTool:
|
|
| 580 |
filtered_logs = _filter_uv_install_output(all_logs)
|
| 581 |
|
| 582 |
# Format all logs for the agent
|
| 583 |
-
log_text = "\n".join(filtered_logs) if filtered_logs else "(no logs)"
|
| 584 |
|
| 585 |
response = f"""{job_type} job completed!
|
| 586 |
|
|
@@ -657,7 +665,7 @@ class HfJobsTool:
|
|
| 657 |
"resultsShared": 0,
|
| 658 |
}
|
| 659 |
|
| 660 |
-
log_text = "\n".join(logs)
|
| 661 |
return {
|
| 662 |
"formatted": f"**Logs for {job_id}:**\n\n```\n{log_text}\n```",
|
| 663 |
"totalResults": 1,
|
|
|
|
| 117 |
return logs
|
| 118 |
|
| 119 |
|
| 120 |
+
_ANSI_RE = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]|\x1b\].*?\x07')
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def _strip_ansi(text: str) -> str:
|
| 124 |
+
return _ANSI_RE.sub('', text)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
_DEFAULT_ENV = {
|
| 128 |
"HF_HUB_DISABLE_PROGRESS_BARS": "1",
|
| 129 |
"TQDM_DISABLE": "1",
|
| 130 |
"TRANSFORMERS_VERBOSITY": "warning",
|
| 131 |
"HF_HUB_ENABLE_HF_TRANSFER": "1",
|
| 132 |
+
"UV_NO_PROGRESS": "1",
|
| 133 |
}
|
| 134 |
|
| 135 |
|
|
|
|
| 588 |
filtered_logs = _filter_uv_install_output(all_logs)
|
| 589 |
|
| 590 |
# Format all logs for the agent
|
| 591 |
+
log_text = _strip_ansi("\n".join(filtered_logs)) if filtered_logs else "(no logs)"
|
| 592 |
|
| 593 |
response = f"""{job_type} job completed!
|
| 594 |
|
|
|
|
| 665 |
"resultsShared": 0,
|
| 666 |
}
|
| 667 |
|
| 668 |
+
log_text = _strip_ansi("\n".join(logs))
|
| 669 |
return {
|
| 670 |
"formatted": f"**Logs for {job_id}:**\n\n```\n{log_text}\n```",
|
| 671 |
"totalResults": 1,
|
agent/tools/local_tools.py
CHANGED
|
@@ -8,18 +8,78 @@ subprocess/pathlib instead of going through a remote sandbox.
|
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
|
|
|
|
|
|
|
| 11 |
import subprocess
|
|
|
|
| 12 |
from pathlib import Path
|
| 13 |
from typing import Any
|
| 14 |
|
| 15 |
from agent.tools.sandbox_client import Sandbox
|
| 16 |
|
| 17 |
-
MAX_OUTPUT_CHARS =
|
| 18 |
MAX_LINE_LENGTH = 2000
|
| 19 |
DEFAULT_READ_LINES = 2000
|
| 20 |
DEFAULT_TIMEOUT = 120
|
| 21 |
MAX_TIMEOUT = 600
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# ── Handlers ────────────────────────────────────────────────────────────
|
| 25 |
|
|
@@ -38,9 +98,8 @@ async def _bash_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 38 |
cwd=work_dir,
|
| 39 |
timeout=timeout,
|
| 40 |
)
|
| 41 |
-
output = result.stdout + result.stderr
|
| 42 |
-
|
| 43 |
-
output = output[:MAX_OUTPUT_CHARS] + "\n... (output truncated)"
|
| 44 |
if not output.strip():
|
| 45 |
output = "(no output)"
|
| 46 |
return output, result.returncode == 0
|
|
@@ -83,18 +142,27 @@ async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 83 |
return "No path provided.", False
|
| 84 |
p = Path(file_path)
|
| 85 |
try:
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
except Exception as e:
|
| 90 |
return f"write error: {e}", False
|
| 91 |
|
| 92 |
|
| 93 |
async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
|
|
|
|
|
| 94 |
file_path = args.get("path", "")
|
| 95 |
old_str = args.get("old_str", "")
|
| 96 |
new_str = args.get("new_str", "")
|
| 97 |
replace_all = args.get("replace_all", False)
|
|
|
|
| 98 |
|
| 99 |
if not file_path:
|
| 100 |
return "No path provided.", False
|
|
@@ -110,23 +178,27 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 110 |
except Exception as e:
|
| 111 |
return f"edit read error: {e}", False
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
"or provide a more specific old_str."
|
| 120 |
-
), False
|
| 121 |
|
| 122 |
-
new_text = text.replace(old_str, new_str) if replace_all else text.replace(old_str, new_str, 1)
|
| 123 |
try:
|
| 124 |
-
|
| 125 |
except Exception as e:
|
| 126 |
return f"edit write error: {e}", False
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
# ── Public API ──────────────────────────────────────────────────────────
|
|
|
|
| 8 |
|
| 9 |
from __future__ import annotations
|
| 10 |
|
| 11 |
+
import os
|
| 12 |
+
import re
|
| 13 |
import subprocess
|
| 14 |
+
import tempfile
|
| 15 |
from pathlib import Path
|
| 16 |
from typing import Any
|
| 17 |
|
| 18 |
from agent.tools.sandbox_client import Sandbox
|
| 19 |
|
| 20 |
+
MAX_OUTPUT_CHARS = 25_000
|
| 21 |
MAX_LINE_LENGTH = 2000
|
| 22 |
DEFAULT_READ_LINES = 2000
|
| 23 |
DEFAULT_TIMEOUT = 120
|
| 24 |
MAX_TIMEOUT = 600
|
| 25 |
|
| 26 |
+
_ANSI_RE = re.compile(r'\x1b\[[0-9;]*[a-zA-Z]|\x1b\].*?\x07')
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
def _atomic_write(path: Path, content: str) -> None:
|
| 30 |
+
"""Write file atomically via temp file + os.replace().
|
| 31 |
+
|
| 32 |
+
Ensures the file is never left in a partial/corrupted state — it's either
|
| 33 |
+
the old content or the new content, never half-written.
|
| 34 |
+
"""
|
| 35 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 36 |
+
fd = None
|
| 37 |
+
tmp_path = None
|
| 38 |
+
try:
|
| 39 |
+
fd, tmp_path = tempfile.mkstemp(dir=path.parent, suffix=".tmp")
|
| 40 |
+
os.write(fd, content.encode("utf-8"))
|
| 41 |
+
os.fsync(fd)
|
| 42 |
+
os.close(fd)
|
| 43 |
+
fd = None
|
| 44 |
+
os.replace(tmp_path, str(path))
|
| 45 |
+
tmp_path = None # successfully replaced, nothing to clean up
|
| 46 |
+
finally:
|
| 47 |
+
if fd is not None:
|
| 48 |
+
os.close(fd)
|
| 49 |
+
if tmp_path is not None:
|
| 50 |
+
try:
|
| 51 |
+
os.unlink(tmp_path)
|
| 52 |
+
except OSError:
|
| 53 |
+
pass
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def _strip_ansi(text: str) -> str:
|
| 57 |
+
return _ANSI_RE.sub('', text)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _truncate_output(output: str, max_chars: int = MAX_OUTPUT_CHARS, head_ratio: float = 0.25) -> str:
|
| 61 |
+
"""Tail-biased truncation with temp file spillover for full output access."""
|
| 62 |
+
if len(output) <= max_chars:
|
| 63 |
+
return output
|
| 64 |
+
# Write full output to temp file so LLM can read specific sections
|
| 65 |
+
spill_path = None
|
| 66 |
+
try:
|
| 67 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', delete=False) as f:
|
| 68 |
+
f.write(output)
|
| 69 |
+
spill_path = f.name
|
| 70 |
+
except Exception:
|
| 71 |
+
pass
|
| 72 |
+
head_budget = int(max_chars * head_ratio)
|
| 73 |
+
tail_budget = max_chars - head_budget
|
| 74 |
+
head = output[:head_budget]
|
| 75 |
+
tail = output[-tail_budget:]
|
| 76 |
+
total = len(output)
|
| 77 |
+
omitted = total - max_chars
|
| 78 |
+
meta = f"\n\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\n"
|
| 79 |
+
if spill_path:
|
| 80 |
+
meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\n"
|
| 81 |
+
return head + meta + tail
|
| 82 |
+
|
| 83 |
|
| 84 |
# ── Handlers ────────────────────────────────────────────────────────────
|
| 85 |
|
|
|
|
| 98 |
cwd=work_dir,
|
| 99 |
timeout=timeout,
|
| 100 |
)
|
| 101 |
+
output = _strip_ansi(result.stdout + result.stderr)
|
| 102 |
+
output = _truncate_output(output)
|
|
|
|
| 103 |
if not output.strip():
|
| 104 |
output = "(no output)"
|
| 105 |
return output, result.returncode == 0
|
|
|
|
| 142 |
return "No path provided.", False
|
| 143 |
p = Path(file_path)
|
| 144 |
try:
|
| 145 |
+
_atomic_write(p, content)
|
| 146 |
+
msg = f"Wrote {len(content)} bytes to {file_path}"
|
| 147 |
+
# Syntax validation for Python files
|
| 148 |
+
if p.suffix == ".py":
|
| 149 |
+
from agent.tools.edit_utils import validate_python
|
| 150 |
+
warnings = validate_python(content, file_path)
|
| 151 |
+
if warnings:
|
| 152 |
+
msg += "\n\nValidation warnings:\n" + "\n".join(f" ⚠ {w}" for w in warnings)
|
| 153 |
+
return msg, True
|
| 154 |
except Exception as e:
|
| 155 |
return f"write error: {e}", False
|
| 156 |
|
| 157 |
|
| 158 |
async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
| 159 |
+
from agent.tools.edit_utils import apply_edit, validate_python
|
| 160 |
+
|
| 161 |
file_path = args.get("path", "")
|
| 162 |
old_str = args.get("old_str", "")
|
| 163 |
new_str = args.get("new_str", "")
|
| 164 |
replace_all = args.get("replace_all", False)
|
| 165 |
+
mode = args.get("mode", "replace")
|
| 166 |
|
| 167 |
if not file_path:
|
| 168 |
return "No path provided.", False
|
|
|
|
| 178 |
except Exception as e:
|
| 179 |
return f"edit read error: {e}", False
|
| 180 |
|
| 181 |
+
try:
|
| 182 |
+
new_text, replacements, fuzzy_note = apply_edit(
|
| 183 |
+
text, old_str, new_str, mode=mode, replace_all=replace_all
|
| 184 |
+
)
|
| 185 |
+
except ValueError as e:
|
| 186 |
+
return str(e), False
|
|
|
|
|
|
|
| 187 |
|
|
|
|
| 188 |
try:
|
| 189 |
+
_atomic_write(p, new_text)
|
| 190 |
except Exception as e:
|
| 191 |
return f"edit write error: {e}", False
|
| 192 |
|
| 193 |
+
msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
|
| 194 |
+
if fuzzy_note:
|
| 195 |
+
msg += f" {fuzzy_note}"
|
| 196 |
+
# Syntax validation for Python files
|
| 197 |
+
if p.suffix == ".py":
|
| 198 |
+
warnings = validate_python(new_text, file_path)
|
| 199 |
+
if warnings:
|
| 200 |
+
msg += "\n\nValidation warnings:\n" + "\n".join(f" ⚠ {w}" for w in warnings)
|
| 201 |
+
return msg, True
|
| 202 |
|
| 203 |
|
| 204 |
# ── Public API ──────────────────────────────────────────────────────────
|
agent/tools/sandbox_client.py
CHANGED
|
@@ -56,7 +56,7 @@ HARDWARE_OPTIONS = [
|
|
| 56 |
"a10g-large",
|
| 57 |
"a100-large",
|
| 58 |
]
|
| 59 |
-
OUTPUT_LIMIT =
|
| 60 |
LINE_LIMIT = 2000
|
| 61 |
DEFAULT_READ_LIMIT = 2000
|
| 62 |
DEFAULT_TIMEOUT = 240
|
|
@@ -85,7 +85,9 @@ ENV HOME=/home/user \\
|
|
| 85 |
PIP_USER=1 \\
|
| 86 |
HF_HUB_DISABLE_PROGRESS_BARS=1 \\
|
| 87 |
TQDM_DISABLE=1 \\
|
| 88 |
-
HF_HUB_ENABLE_HF_TRANSFER=1
|
|
|
|
|
|
|
| 89 |
|
| 90 |
WORKDIR /app
|
| 91 |
COPY --chown=user . /app
|
|
@@ -97,12 +99,61 @@ CMD ["python", "sandbox_server.py"]
|
|
| 97 |
|
| 98 |
_SANDBOX_SERVER = '''\
|
| 99 |
"""Minimal FastAPI server for sandbox operations."""
|
| 100 |
-
import os, subprocess, pathlib, signal, threading
|
| 101 |
from fastapi import FastAPI
|
| 102 |
from pydantic import BaseModel
|
| 103 |
from typing import Optional
|
| 104 |
import uvicorn
|
| 105 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
app = FastAPI()
|
| 107 |
|
| 108 |
# Track active bash processes so they can be killed on cancel
|
|
@@ -128,10 +179,123 @@ class EditReq(BaseModel):
|
|
| 128 |
old_str: str
|
| 129 |
new_str: str
|
| 130 |
replace_all: bool = False
|
|
|
|
| 131 |
|
| 132 |
class ExistsReq(BaseModel):
|
| 133 |
path: str
|
| 134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
@app.get("/api/health")
|
| 136 |
def health():
|
| 137 |
return {"status": "ok"}
|
|
@@ -147,9 +311,8 @@ def bash(req: BashReq):
|
|
| 147 |
_active_procs[proc.pid] = proc
|
| 148 |
try:
|
| 149 |
stdout, stderr = proc.communicate(timeout=req.timeout)
|
| 150 |
-
output = stdout + stderr
|
| 151 |
-
|
| 152 |
-
output = output[:30000] + "\\n... (truncated)"
|
| 153 |
return {"success": proc.returncode == 0, "output": output, "error": "" if proc.returncode == 0 else f"Exit code {proc.returncode}"}
|
| 154 |
except subprocess.TimeoutExpired:
|
| 155 |
try:
|
|
@@ -203,9 +366,13 @@ def read(req: ReadReq):
|
|
| 203 |
def write(req: WriteReq):
|
| 204 |
try:
|
| 205 |
p = pathlib.Path(req.path)
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
except Exception as e:
|
| 210 |
return {"success": False, "output": "", "error": str(e)}
|
| 211 |
|
|
@@ -216,16 +383,23 @@ def edit(req: EditReq):
|
|
| 216 |
if not p.exists():
|
| 217 |
return {"success": False, "output": "", "error": f"File not found: {req.path}"}
|
| 218 |
content = p.read_text()
|
| 219 |
-
if req.old_str
|
| 220 |
-
return {"success": False, "output": "", "error":
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
except Exception as e:
|
| 230 |
return {"success": False, "output": "", "error": str(e)}
|
| 231 |
|
|
@@ -605,7 +779,8 @@ class Sandbox:
|
|
| 605 |
return result
|
| 606 |
|
| 607 |
def edit(
|
| 608 |
-
self, path: str, old_str: str, new_str: str, *, replace_all: bool = False
|
|
|
|
| 609 |
) -> ToolResult:
|
| 610 |
if old_str == new_str:
|
| 611 |
return ToolResult(success=False, error="old_str and new_str are identical.")
|
|
@@ -621,6 +796,7 @@ class Sandbox:
|
|
| 621 |
"old_str": old_str,
|
| 622 |
"new_str": new_str,
|
| 623 |
"replace_all": replace_all,
|
|
|
|
| 624 |
},
|
| 625 |
)
|
| 626 |
|
|
@@ -731,7 +907,12 @@ class Sandbox:
|
|
| 731 |
},
|
| 732 |
"edit": {
|
| 733 |
"description": (
|
| 734 |
-
"Targeted edit via
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 735 |
"\n"
|
| 736 |
"Rules:\n"
|
| 737 |
"- old_str must appear EXACTLY once (unless replace_all is true).\n"
|
|
@@ -742,7 +923,9 @@ class Sandbox:
|
|
| 742 |
"- File MUST have been read this session (system enforced).\n"
|
| 743 |
"- Do NOT include line number prefixes in old_str/new_str.\n"
|
| 744 |
"\n"
|
| 745 |
-
"
|
|
|
|
|
|
|
| 746 |
),
|
| 747 |
"parameters": {
|
| 748 |
"type": "object",
|
|
@@ -755,14 +938,20 @@ class Sandbox:
|
|
| 755 |
},
|
| 756 |
"old_str": {
|
| 757 |
"type": "string",
|
| 758 |
-
"description": "
|
| 759 |
},
|
| 760 |
-
"new_str": {"type": "string", "description": "Replacement text."},
|
| 761 |
"replace_all": {
|
| 762 |
"type": "boolean",
|
| 763 |
"description": "Replace all occurrences (default: false).",
|
| 764 |
"default": False,
|
| 765 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 766 |
},
|
| 767 |
},
|
| 768 |
},
|
|
@@ -791,6 +980,7 @@ class Sandbox:
|
|
| 791 |
a["old_str"],
|
| 792 |
a["new_str"],
|
| 793 |
replace_all=a.get("replace_all", False),
|
|
|
|
| 794 |
),
|
| 795 |
}
|
| 796 |
fn = dispatch.get(name)
|
|
|
|
| 56 |
"a10g-large",
|
| 57 |
"a100-large",
|
| 58 |
]
|
| 59 |
+
OUTPUT_LIMIT = 25000
|
| 60 |
LINE_LIMIT = 2000
|
| 61 |
DEFAULT_READ_LIMIT = 2000
|
| 62 |
DEFAULT_TIMEOUT = 240
|
|
|
|
| 85 |
PIP_USER=1 \\
|
| 86 |
HF_HUB_DISABLE_PROGRESS_BARS=1 \\
|
| 87 |
TQDM_DISABLE=1 \\
|
| 88 |
+
HF_HUB_ENABLE_HF_TRANSFER=1 \\
|
| 89 |
+
UV_NO_PROGRESS=1 \\
|
| 90 |
+
PYTHONWARNINGS=ignore::DeprecationWarning
|
| 91 |
|
| 92 |
WORKDIR /app
|
| 93 |
COPY --chown=user . /app
|
|
|
|
| 99 |
|
| 100 |
_SANDBOX_SERVER = '''\
|
| 101 |
"""Minimal FastAPI server for sandbox operations."""
|
| 102 |
+
import os, subprocess, pathlib, signal, threading, re, tempfile
|
| 103 |
from fastapi import FastAPI
|
| 104 |
from pydantic import BaseModel
|
| 105 |
from typing import Optional
|
| 106 |
import uvicorn
|
| 107 |
|
| 108 |
+
_ANSI_RE = re.compile(r'\\x1b\\[[0-9;]*[a-zA-Z]|\\x1b\\].*?\\x07')
|
| 109 |
+
|
| 110 |
+
def _strip_ansi(text: str) -> str:
|
| 111 |
+
return _ANSI_RE.sub('', text)
|
| 112 |
+
|
| 113 |
+
def _truncate_output(output: str, max_chars: int = 25000, head_ratio: float = 0.25) -> str:
|
| 114 |
+
if len(output) <= max_chars:
|
| 115 |
+
return output
|
| 116 |
+
# Write full output to temp file so LLM can read specific sections
|
| 117 |
+
spill_path = None
|
| 118 |
+
try:
|
| 119 |
+
with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', prefix='bash_output_', dir='/tmp', delete=False) as f:
|
| 120 |
+
f.write(output)
|
| 121 |
+
spill_path = f.name
|
| 122 |
+
except Exception:
|
| 123 |
+
pass
|
| 124 |
+
head_budget = int(max_chars * head_ratio)
|
| 125 |
+
tail_budget = max_chars - head_budget
|
| 126 |
+
head = output[:head_budget]
|
| 127 |
+
tail = output[-tail_budget:]
|
| 128 |
+
total = len(output)
|
| 129 |
+
omitted = total - max_chars
|
| 130 |
+
meta = f"\\n\\n... ({omitted:,} of {total:,} chars omitted, showing first {head_budget:,} + last {tail_budget:,}) ...\\n"
|
| 131 |
+
if spill_path:
|
| 132 |
+
meta += f"Full output saved to {spill_path} — use the read tool with offset/limit to inspect specific sections.\\n"
|
| 133 |
+
return head + meta + tail
|
| 134 |
+
|
| 135 |
+
def _atomic_write(path: pathlib.Path, content: str):
|
| 136 |
+
"""Write atomically: temp file + fsync + os.replace."""
|
| 137 |
+
path.parent.mkdir(parents=True, exist_ok=True)
|
| 138 |
+
fd = None
|
| 139 |
+
tmp_path = None
|
| 140 |
+
try:
|
| 141 |
+
fd, tmp_path = tempfile.mkstemp(dir=str(path.parent), suffix=".tmp")
|
| 142 |
+
os.write(fd, content.encode("utf-8"))
|
| 143 |
+
os.fsync(fd)
|
| 144 |
+
os.close(fd)
|
| 145 |
+
fd = None
|
| 146 |
+
os.replace(tmp_path, str(path))
|
| 147 |
+
tmp_path = None
|
| 148 |
+
finally:
|
| 149 |
+
if fd is not None:
|
| 150 |
+
os.close(fd)
|
| 151 |
+
if tmp_path is not None:
|
| 152 |
+
try:
|
| 153 |
+
os.unlink(tmp_path)
|
| 154 |
+
except OSError:
|
| 155 |
+
pass
|
| 156 |
+
|
| 157 |
app = FastAPI()
|
| 158 |
|
| 159 |
# Track active bash processes so they can be killed on cancel
|
|
|
|
| 179 |
old_str: str
|
| 180 |
new_str: str
|
| 181 |
replace_all: bool = False
|
| 182 |
+
mode: str = "replace"
|
| 183 |
|
| 184 |
class ExistsReq(BaseModel):
|
| 185 |
path: str
|
| 186 |
|
| 187 |
+
# ── Fuzzy matching & edit utilities (embedded) ──
|
| 188 |
+
|
| 189 |
+
UNICODE_MAP = {
|
| 190 |
+
"\\u2013": "-", "\\u2014": "-", "\\u2212": "-",
|
| 191 |
+
"\\u2018": "'", "\\u2019": "'",
|
| 192 |
+
"\\u201c": \'"\', "\\u201d": \'"\',
|
| 193 |
+
"\\u00a0": " ", "\\u2003": " ", "\\u2002": " ",
|
| 194 |
+
"\\u200b": "", "\\ufeff": "",
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
def _normalize_unicode(s):
|
| 198 |
+
return "".join(UNICODE_MAP.get(c, c) for c in s)
|
| 199 |
+
|
| 200 |
+
def _fuzzy_find_original(content, pattern):
|
| 201 |
+
"""Find the original text in content that matches pattern fuzzily."""
|
| 202 |
+
if pattern in content:
|
| 203 |
+
return pattern, None
|
| 204 |
+
# Pass 2: right-trim
|
| 205 |
+
c_lines = content.split("\\n")
|
| 206 |
+
c_rt = "\\n".join(l.rstrip() for l in c_lines)
|
| 207 |
+
p_rt = "\\n".join(l.rstrip() for l in pattern.split("\\n"))
|
| 208 |
+
if p_rt in c_rt:
|
| 209 |
+
idx = c_rt.index(p_rt)
|
| 210 |
+
start_line = c_rt[:idx].count("\\n")
|
| 211 |
+
n_lines = p_rt.count("\\n") + 1
|
| 212 |
+
matched = "\\n".join(c_lines[start_line:start_line + n_lines])
|
| 213 |
+
return matched, "(matched after trimming trailing whitespace)"
|
| 214 |
+
# Pass 3: both-sides trim
|
| 215 |
+
c_st = "\\n".join(l.strip() for l in c_lines)
|
| 216 |
+
p_st = "\\n".join(l.strip() for l in pattern.split("\\n"))
|
| 217 |
+
if p_st in c_st:
|
| 218 |
+
idx = c_st.index(p_st)
|
| 219 |
+
start_line = c_st[:idx].count("\\n")
|
| 220 |
+
n_lines = p_st.count("\\n") + 1
|
| 221 |
+
matched = "\\n".join(c_lines[start_line:start_line + n_lines])
|
| 222 |
+
return matched, "(matched after trimming whitespace)"
|
| 223 |
+
# Pass 4: unicode normalization
|
| 224 |
+
c_norm = _normalize_unicode(c_st)
|
| 225 |
+
p_norm = _normalize_unicode(p_st)
|
| 226 |
+
if p_norm in c_norm:
|
| 227 |
+
idx = c_norm.index(p_norm)
|
| 228 |
+
start_line = c_norm[:idx].count("\\n")
|
| 229 |
+
n_lines = p_norm.count("\\n") + 1
|
| 230 |
+
matched = "\\n".join(c_lines[start_line:start_line + n_lines])
|
| 231 |
+
return matched, "(matched after unicode normalization)"
|
| 232 |
+
return None, None
|
| 233 |
+
|
| 234 |
+
def _apply_edit(content, old_str, new_str, mode="replace", replace_all=False):
|
| 235 |
+
"""Apply edit. Returns (new_content, count, fuzzy_note) or raises ValueError."""
|
| 236 |
+
if mode == "replace_all":
|
| 237 |
+
replace_all = True
|
| 238 |
+
mode = "replace"
|
| 239 |
+
fuzzy_note = None
|
| 240 |
+
if old_str not in content:
|
| 241 |
+
matched, fuzzy_note = _fuzzy_find_original(content, old_str)
|
| 242 |
+
if matched is None:
|
| 243 |
+
raise ValueError("old_str not found in file.")
|
| 244 |
+
old_str = matched
|
| 245 |
+
count = content.count(old_str)
|
| 246 |
+
if mode == "replace":
|
| 247 |
+
if count > 1 and not replace_all:
|
| 248 |
+
raise ValueError(f"old_str appears {count} times. Use replace_all=true or provide more context.")
|
| 249 |
+
if replace_all:
|
| 250 |
+
return content.replace(old_str, new_str), count, fuzzy_note
|
| 251 |
+
return content.replace(old_str, new_str, 1), 1, fuzzy_note
|
| 252 |
+
elif mode == "append_after":
|
| 253 |
+
if replace_all:
|
| 254 |
+
return content.replace(old_str, old_str + new_str), count, fuzzy_note
|
| 255 |
+
idx = content.index(old_str) + len(old_str)
|
| 256 |
+
return content[:idx] + new_str + content[idx:], 1, fuzzy_note
|
| 257 |
+
elif mode == "prepend_before":
|
| 258 |
+
if replace_all:
|
| 259 |
+
return content.replace(old_str, new_str + old_str), count, fuzzy_note
|
| 260 |
+
idx = content.index(old_str)
|
| 261 |
+
return content[:idx] + new_str + content[idx:], 1, fuzzy_note
|
| 262 |
+
raise ValueError(f"Unknown mode: {mode}")
|
| 263 |
+
|
| 264 |
+
def _validate_python(content, path=""):
|
| 265 |
+
"""Lightweight Python validation. Returns list of warning strings."""
|
| 266 |
+
import ast as _ast, importlib as _il
|
| 267 |
+
warnings = []
|
| 268 |
+
try:
|
| 269 |
+
tree = _ast.parse(content)
|
| 270 |
+
except SyntaxError as e:
|
| 271 |
+
warnings.append(f"Python syntax error at line {e.lineno}: {e.msg}")
|
| 272 |
+
return warnings
|
| 273 |
+
for node in _ast.walk(tree):
|
| 274 |
+
if isinstance(node, _ast.ImportFrom) and node.module:
|
| 275 |
+
try:
|
| 276 |
+
mod = _il.import_module(node.module)
|
| 277 |
+
for alias in node.names:
|
| 278 |
+
if alias.name != "*" and not hasattr(mod, alias.name):
|
| 279 |
+
warnings.append(f"Import warning: '{alias.name}' not found in '{node.module}' (line {node.lineno})")
|
| 280 |
+
except ImportError as e:
|
| 281 |
+
warnings.append(f"Import error: {e} (line {node.lineno})")
|
| 282 |
+
except Exception:
|
| 283 |
+
pass
|
| 284 |
+
elif isinstance(node, _ast.Import):
|
| 285 |
+
for alias in node.names:
|
| 286 |
+
try:
|
| 287 |
+
_il.import_module(alias.name)
|
| 288 |
+
except ImportError as e:
|
| 289 |
+
warnings.append(f"Import error: {e} (line {node.lineno})")
|
| 290 |
+
except Exception:
|
| 291 |
+
pass
|
| 292 |
+
if any(kw in content for kw in ("TrainingArguments", "SFTConfig", "DPOConfig", "GRPOConfig")):
|
| 293 |
+
if "push_to_hub" not in content:
|
| 294 |
+
warnings.append("Training script warning: no \'push_to_hub\' found")
|
| 295 |
+
if "hub_model_id" not in content:
|
| 296 |
+
warnings.append("Training script warning: no \'hub_model_id\' found")
|
| 297 |
+
return warnings
|
| 298 |
+
|
| 299 |
@app.get("/api/health")
|
| 300 |
def health():
|
| 301 |
return {"status": "ok"}
|
|
|
|
| 311 |
_active_procs[proc.pid] = proc
|
| 312 |
try:
|
| 313 |
stdout, stderr = proc.communicate(timeout=req.timeout)
|
| 314 |
+
output = _strip_ansi(stdout + stderr)
|
| 315 |
+
output = _truncate_output(output)
|
|
|
|
| 316 |
return {"success": proc.returncode == 0, "output": output, "error": "" if proc.returncode == 0 else f"Exit code {proc.returncode}"}
|
| 317 |
except subprocess.TimeoutExpired:
|
| 318 |
try:
|
|
|
|
| 366 |
def write(req: WriteReq):
|
| 367 |
try:
|
| 368 |
p = pathlib.Path(req.path)
|
| 369 |
+
_atomic_write(p, req.content)
|
| 370 |
+
msg = f"Wrote {len(req.content)} bytes to {req.path}"
|
| 371 |
+
if p.suffix == ".py":
|
| 372 |
+
warnings = _validate_python(req.content, req.path)
|
| 373 |
+
if warnings:
|
| 374 |
+
msg += "\\n\\nValidation warnings:\\n" + "\\n".join(f" ! {w}" for w in warnings)
|
| 375 |
+
return {"success": True, "output": msg, "error": ""}
|
| 376 |
except Exception as e:
|
| 377 |
return {"success": False, "output": "", "error": str(e)}
|
| 378 |
|
|
|
|
| 383 |
if not p.exists():
|
| 384 |
return {"success": False, "output": "", "error": f"File not found: {req.path}"}
|
| 385 |
content = p.read_text()
|
| 386 |
+
if req.old_str == req.new_str:
|
| 387 |
+
return {"success": False, "output": "", "error": "old_str and new_str must differ."}
|
| 388 |
+
try:
|
| 389 |
+
new_content, count, fuzzy_note = _apply_edit(
|
| 390 |
+
content, req.old_str, req.new_str, mode=req.mode, replace_all=req.replace_all
|
| 391 |
+
)
|
| 392 |
+
except ValueError as e:
|
| 393 |
+
return {"success": False, "output": "", "error": str(e)}
|
| 394 |
+
_atomic_write(p, new_content)
|
| 395 |
+
msg = f"Edited {req.path} ({count} replacement{'s' if count > 1 else ''})"
|
| 396 |
+
if fuzzy_note:
|
| 397 |
+
msg += f" {fuzzy_note}"
|
| 398 |
+
if p.suffix == ".py":
|
| 399 |
+
warnings = _validate_python(new_content, req.path)
|
| 400 |
+
if warnings:
|
| 401 |
+
msg += "\\n\\nValidation warnings:\\n" + "\\n".join(f" ! {w}" for w in warnings)
|
| 402 |
+
return {"success": True, "output": msg, "error": ""}
|
| 403 |
except Exception as e:
|
| 404 |
return {"success": False, "output": "", "error": str(e)}
|
| 405 |
|
|
|
|
| 779 |
return result
|
| 780 |
|
| 781 |
def edit(
|
| 782 |
+
self, path: str, old_str: str, new_str: str, *, replace_all: bool = False,
|
| 783 |
+
mode: str = "replace",
|
| 784 |
) -> ToolResult:
|
| 785 |
if old_str == new_str:
|
| 786 |
return ToolResult(success=False, error="old_str and new_str are identical.")
|
|
|
|
| 796 |
"old_str": old_str,
|
| 797 |
"new_str": new_str,
|
| 798 |
"replace_all": replace_all,
|
| 799 |
+
"mode": mode,
|
| 800 |
},
|
| 801 |
)
|
| 802 |
|
|
|
|
| 907 |
},
|
| 908 |
"edit": {
|
| 909 |
"description": (
|
| 910 |
+
"Targeted edit via string replacement with fuzzy matching fallback.\n"
|
| 911 |
+
"\n"
|
| 912 |
+
"Modes:\n"
|
| 913 |
+
"- replace (default): replace first occurrence of old_str with new_str.\n"
|
| 914 |
+
"- append_after: insert new_str immediately after old_str (old_str is kept).\n"
|
| 915 |
+
"- prepend_before: insert new_str immediately before old_str (old_str is kept).\n"
|
| 916 |
"\n"
|
| 917 |
"Rules:\n"
|
| 918 |
"- old_str must appear EXACTLY once (unless replace_all is true).\n"
|
|
|
|
| 923 |
"- File MUST have been read this session (system enforced).\n"
|
| 924 |
"- Do NOT include line number prefixes in old_str/new_str.\n"
|
| 925 |
"\n"
|
| 926 |
+
"If exact match fails, the tool automatically tries trimmed/normalized matching.\n"
|
| 927 |
+
"Use replace_all=true for batch operations like variable renaming.\n"
|
| 928 |
+
"Use append_after/prepend_before to insert code without replacing existing code."
|
| 929 |
),
|
| 930 |
"parameters": {
|
| 931 |
"type": "object",
|
|
|
|
| 938 |
},
|
| 939 |
"old_str": {
|
| 940 |
"type": "string",
|
| 941 |
+
"description": "Text to find (fuzzy matching used as fallback).",
|
| 942 |
},
|
| 943 |
+
"new_str": {"type": "string", "description": "Replacement text (or text to insert for append_after/prepend_before)."},
|
| 944 |
"replace_all": {
|
| 945 |
"type": "boolean",
|
| 946 |
"description": "Replace all occurrences (default: false).",
|
| 947 |
"default": False,
|
| 948 |
},
|
| 949 |
+
"mode": {
|
| 950 |
+
"type": "string",
|
| 951 |
+
"enum": ["replace", "append_after", "prepend_before"],
|
| 952 |
+
"description": "Edit mode (default: replace).",
|
| 953 |
+
"default": "replace",
|
| 954 |
+
},
|
| 955 |
},
|
| 956 |
},
|
| 957 |
},
|
|
|
|
| 980 |
a["old_str"],
|
| 981 |
a["new_str"],
|
| 982 |
replace_all=a.get("replace_all", False),
|
| 983 |
+
mode=a.get("mode", "replace"),
|
| 984 |
),
|
| 985 |
}
|
| 986 |
fn = dispatch.get(name)
|