Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
Commit ·
0e5c34b
1
Parent(s): 263e229
Deduplicate file re-reads via content hashing for local and sandbox tools
Browse files- agent/core/session.py +3 -0
- agent/tools/file_content_cache.py +40 -0
- agent/tools/local_tools.py +27 -1
- agent/tools/sandbox_tool.py +21 -1
agent/core/session.py
CHANGED
|
@@ -12,6 +12,7 @@ from typing import Any, Optional
|
|
| 12 |
|
| 13 |
from agent.config import Config
|
| 14 |
from agent.context_manager.manager import ContextManager
|
|
|
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
@@ -105,6 +106,8 @@ class Session:
|
|
| 105 |
self.sandbox = None
|
| 106 |
self._running_job_ids: set[str] = set() # HF job IDs currently executing
|
| 107 |
|
|
|
|
|
|
|
| 108 |
# Session trajectory logging
|
| 109 |
self.logged_events: list[dict] = []
|
| 110 |
self.session_start_time = datetime.now().isoformat()
|
|
|
|
| 12 |
|
| 13 |
from agent.config import Config
|
| 14 |
from agent.context_manager.manager import ContextManager
|
| 15 |
+
from agent.tools.file_content_cache import FileContentCache
|
| 16 |
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
|
|
|
| 106 |
self.sandbox = None
|
| 107 |
self._running_job_ids: set[str] = set() # HF job IDs currently executing
|
| 108 |
|
| 109 |
+
self.file_content_cache = FileContentCache()
|
| 110 |
+
|
| 111 |
# Session trajectory logging
|
| 112 |
self.logged_events: list[dict] = []
|
| 113 |
self.session_start_time = datetime.now().isoformat()
|
agent/tools/file_content_cache.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Cache for detecting unchanged local file re-reads."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
import hashlib
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def _short_hash(content: str) -> str:
|
| 9 |
+
return hashlib.sha256(content.encode()).hexdigest()[:16]
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def _resolve(path: str) -> str:
|
| 13 |
+
try:
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
return str(Path(path).resolve())
|
| 16 |
+
except Exception:
|
| 17 |
+
return path
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class FileContentCache:
|
| 21 |
+
"""Tracks file content hashes to skip re-reading unchanged files."""
|
| 22 |
+
|
| 23 |
+
def __init__(self) -> None:
|
| 24 |
+
self._cache: dict[str, tuple[str, int]] = {}
|
| 25 |
+
|
| 26 |
+
def record_read(self, path: str, content: str, turn: int) -> None:
|
| 27 |
+
key = _resolve(path)
|
| 28 |
+
self._cache[key] = (_short_hash(content), turn)
|
| 29 |
+
|
| 30 |
+
def check_unchanged(self, path: str, content: str) -> tuple[bool, int | None]:
|
| 31 |
+
key = _resolve(path)
|
| 32 |
+
cached = self._cache.get(key)
|
| 33 |
+
if cached is None:
|
| 34 |
+
return False, None
|
| 35 |
+
cached_hash, turn = cached
|
| 36 |
+
return _short_hash(content) == cached_hash, turn
|
| 37 |
+
|
| 38 |
+
def clear_path(self, path: str) -> None:
|
| 39 |
+
key = _resolve(path)
|
| 40 |
+
self._cache.pop(key, None)
|
agent/tools/local_tools.py
CHANGED
|
@@ -119,10 +119,23 @@ async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 119 |
if p.is_dir():
|
| 120 |
return "Cannot read a directory. Use bash with 'ls' instead.", False
|
| 121 |
try:
|
| 122 |
-
|
| 123 |
except Exception as e:
|
| 124 |
return f"read error: {e}", False
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
offset = max((args.get("offset") or 1), 1)
|
| 127 |
limit = args.get("limit") or DEFAULT_READ_LINES
|
| 128 |
|
|
@@ -132,6 +145,12 @@ async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 132 |
if len(line) > MAX_LINE_LENGTH:
|
| 133 |
line = line[:MAX_LINE_LENGTH] + "..."
|
| 134 |
numbered.append(f"{i:>6}\t{line}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 135 |
return "\n".join(numbered), True
|
| 136 |
|
| 137 |
|
|
@@ -143,6 +162,9 @@ async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 143 |
p = Path(file_path)
|
| 144 |
try:
|
| 145 |
_atomic_write(p, content)
|
|
|
|
|
|
|
|
|
|
| 146 |
msg = f"Wrote {len(content)} bytes to {file_path}"
|
| 147 |
# Syntax validation for Python files
|
| 148 |
if p.suffix == ".py":
|
|
@@ -190,6 +212,10 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
|
|
| 190 |
except Exception as e:
|
| 191 |
return f"edit write error: {e}", False
|
| 192 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
|
| 194 |
if fuzzy_note:
|
| 195 |
msg += f" {fuzzy_note}"
|
|
|
|
| 119 |
if p.is_dir():
|
| 120 |
return "Cannot read a directory. Use bash with 'ls' instead.", False
|
| 121 |
try:
|
| 122 |
+
raw_content = p.read_text()
|
| 123 |
except Exception as e:
|
| 124 |
return f"read error: {e}", False
|
| 125 |
|
| 126 |
+
# Check if file is unchanged since last read
|
| 127 |
+
session = _kw.get("session")
|
| 128 |
+
if session is not None:
|
| 129 |
+
is_unchanged, last_turn = session.file_content_cache.check_unchanged(
|
| 130 |
+
file_path, raw_content
|
| 131 |
+
)
|
| 132 |
+
if is_unchanged:
|
| 133 |
+
return (
|
| 134 |
+
f"[File unchanged since turn {last_turn}, "
|
| 135 |
+
f"content already in context.]"
|
| 136 |
+
), True
|
| 137 |
+
|
| 138 |
+
lines = raw_content.splitlines()
|
| 139 |
offset = max((args.get("offset") or 1), 1)
|
| 140 |
limit = args.get("limit") or DEFAULT_READ_LINES
|
| 141 |
|
|
|
|
| 145 |
if len(line) > MAX_LINE_LENGTH:
|
| 146 |
line = line[:MAX_LINE_LENGTH] + "..."
|
| 147 |
numbered.append(f"{i:>6}\t{line}")
|
| 148 |
+
|
| 149 |
+
if session is not None:
|
| 150 |
+
session.file_content_cache.record_read(
|
| 151 |
+
file_path, raw_content, session.turn_count
|
| 152 |
+
)
|
| 153 |
+
|
| 154 |
return "\n".join(numbered), True
|
| 155 |
|
| 156 |
|
|
|
|
| 162 |
p = Path(file_path)
|
| 163 |
try:
|
| 164 |
_atomic_write(p, content)
|
| 165 |
+
session = _kw.get("session")
|
| 166 |
+
if session is not None:
|
| 167 |
+
session.file_content_cache.clear_path(file_path)
|
| 168 |
msg = f"Wrote {len(content)} bytes to {file_path}"
|
| 169 |
# Syntax validation for Python files
|
| 170 |
if p.suffix == ".py":
|
|
|
|
| 212 |
except Exception as e:
|
| 213 |
return f"edit write error: {e}", False
|
| 214 |
|
| 215 |
+
session = _kw.get("session")
|
| 216 |
+
if session is not None:
|
| 217 |
+
session.file_content_cache.clear_path(file_path)
|
| 218 |
+
|
| 219 |
msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
|
| 220 |
if fuzzy_note:
|
| 221 |
msg += f" {fuzzy_note}"
|
agent/tools/sandbox_tool.py
CHANGED
|
@@ -244,7 +244,27 @@ def _make_tool_handler(sandbox_tool_name: str):
|
|
| 244 |
try:
|
| 245 |
result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
|
| 246 |
if result.success:
|
| 247 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
else:
|
| 249 |
error_msg = result.error or "Unknown error"
|
| 250 |
output = result.output
|
|
|
|
| 244 |
try:
|
| 245 |
result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
|
| 246 |
if result.success:
|
| 247 |
+
output = result.output or "(no output)"
|
| 248 |
+
cache = getattr(session, "file_content_cache", None)
|
| 249 |
+
file_path = args.get("path", "")
|
| 250 |
+
|
| 251 |
+
if sandbox_tool_name == "read" and cache and file_path:
|
| 252 |
+
is_unchanged, last_turn = cache.check_unchanged(
|
| 253 |
+
f"sandbox:{file_path}", output
|
| 254 |
+
)
|
| 255 |
+
if is_unchanged:
|
| 256 |
+
return (
|
| 257 |
+
f"[File unchanged since turn {last_turn}, "
|
| 258 |
+
f"content already in context.]"
|
| 259 |
+
), True
|
| 260 |
+
cache.record_read(
|
| 261 |
+
f"sandbox:{file_path}", output, session.turn_count
|
| 262 |
+
)
|
| 263 |
+
|
| 264 |
+
if sandbox_tool_name in ("write", "edit") and cache and file_path:
|
| 265 |
+
cache.clear_path(f"sandbox:{file_path}")
|
| 266 |
+
|
| 267 |
+
return output, True
|
| 268 |
else:
|
| 269 |
error_msg = result.error or "Unknown error"
|
| 270 |
output = result.output
|