akseljoonas HF Staff commited on
Commit
6d4f619
·
1 Parent(s): d4b1495

Deduplicate file re-reads via content hashing for local and sandbox tools

Browse files
agent/core/session.py CHANGED
@@ -12,6 +12,7 @@ from typing import Any, Optional
12
 
13
  from agent.config import Config
14
  from agent.context_manager.manager import ContextManager
 
15
 
16
  logger = logging.getLogger(__name__)
17
 
@@ -105,6 +106,8 @@ class Session:
105
  self.sandbox = None
106
  self._running_job_ids: set[str] = set() # HF job IDs currently executing
107
 
 
 
108
  # Session trajectory logging
109
  self.logged_events: list[dict] = []
110
  self.session_start_time = datetime.now().isoformat()
 
12
 
13
  from agent.config import Config
14
  from agent.context_manager.manager import ContextManager
15
+ from agent.tools.file_content_cache import FileContentCache
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
106
  self.sandbox = None
107
  self._running_job_ids: set[str] = set() # HF job IDs currently executing
108
 
109
+ self.file_content_cache = FileContentCache()
110
+
111
  # Session trajectory logging
112
  self.logged_events: list[dict] = []
113
  self.session_start_time = datetime.now().isoformat()
agent/tools/file_content_cache.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Cache for detecting unchanged local file re-reads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import hashlib
6
+
7
+
8
+ def _short_hash(content: str) -> str:
9
+ return hashlib.sha256(content.encode()).hexdigest()[:16]
10
+
11
+
12
+ def _resolve(path: str) -> str:
13
+ try:
14
+ from pathlib import Path
15
+ return str(Path(path).resolve())
16
+ except Exception:
17
+ return path
18
+
19
+
20
+ class FileContentCache:
21
+ """Tracks file content hashes to skip re-reading unchanged files."""
22
+
23
+ def __init__(self) -> None:
24
+ self._cache: dict[str, tuple[str, int]] = {}
25
+
26
+ def record_read(self, path: str, content: str, turn: int) -> None:
27
+ key = _resolve(path)
28
+ self._cache[key] = (_short_hash(content), turn)
29
+
30
+ def check_unchanged(self, path: str, content: str) -> tuple[bool, int | None]:
31
+ key = _resolve(path)
32
+ cached = self._cache.get(key)
33
+ if cached is None:
34
+ return False, None
35
+ cached_hash, turn = cached
36
+ return _short_hash(content) == cached_hash, turn
37
+
38
+ def clear_path(self, path: str) -> None:
39
+ key = _resolve(path)
40
+ self._cache.pop(key, None)
agent/tools/local_tools.py CHANGED
@@ -119,10 +119,23 @@ async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
119
  if p.is_dir():
120
  return "Cannot read a directory. Use bash with 'ls' instead.", False
121
  try:
122
- lines = p.read_text().splitlines()
123
  except Exception as e:
124
  return f"read error: {e}", False
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  offset = max((args.get("offset") or 1), 1)
127
  limit = args.get("limit") or DEFAULT_READ_LINES
128
 
@@ -132,6 +145,12 @@ async def _read_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
132
  if len(line) > MAX_LINE_LENGTH:
133
  line = line[:MAX_LINE_LENGTH] + "..."
134
  numbered.append(f"{i:>6}\t{line}")
 
 
 
 
 
 
135
  return "\n".join(numbered), True
136
 
137
 
@@ -143,6 +162,9 @@ async def _write_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
143
  p = Path(file_path)
144
  try:
145
  _atomic_write(p, content)
 
 
 
146
  msg = f"Wrote {len(content)} bytes to {file_path}"
147
  # Syntax validation for Python files
148
  if p.suffix == ".py":
@@ -190,6 +212,10 @@ async def _edit_handler(args: dict[str, Any], **_kw) -> tuple[str, bool]:
190
  except Exception as e:
191
  return f"edit write error: {e}", False
192
 
 
 
 
 
193
  msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
194
  if fuzzy_note:
195
  msg += f" {fuzzy_note}"
 
119
  if p.is_dir():
120
  return "Cannot read a directory. Use bash with 'ls' instead.", False
121
  try:
122
+ raw_content = p.read_text()
123
  except Exception as e:
124
  return f"read error: {e}", False
125
 
126
+ # Check if file is unchanged since last read
127
+ session = _kw.get("session")
128
+ if session is not None:
129
+ is_unchanged, last_turn = session.file_content_cache.check_unchanged(
130
+ file_path, raw_content
131
+ )
132
+ if is_unchanged:
133
+ return (
134
+ f"[File unchanged since turn {last_turn}, "
135
+ f"content already in context.]"
136
+ ), True
137
+
138
+ lines = raw_content.splitlines()
139
  offset = max((args.get("offset") or 1), 1)
140
  limit = args.get("limit") or DEFAULT_READ_LINES
141
 
 
145
  if len(line) > MAX_LINE_LENGTH:
146
  line = line[:MAX_LINE_LENGTH] + "..."
147
  numbered.append(f"{i:>6}\t{line}")
148
+
149
+ if session is not None:
150
+ session.file_content_cache.record_read(
151
+ file_path, raw_content, session.turn_count
152
+ )
153
+
154
  return "\n".join(numbered), True
155
 
156
 
 
162
  p = Path(file_path)
163
  try:
164
  _atomic_write(p, content)
165
+ session = _kw.get("session")
166
+ if session is not None:
167
+ session.file_content_cache.clear_path(file_path)
168
  msg = f"Wrote {len(content)} bytes to {file_path}"
169
  # Syntax validation for Python files
170
  if p.suffix == ".py":
 
212
  except Exception as e:
213
  return f"edit write error: {e}", False
214
 
215
+ session = _kw.get("session")
216
+ if session is not None:
217
+ session.file_content_cache.clear_path(file_path)
218
+
219
  msg = f"Edited {file_path} ({replacements} replacement{'s' if replacements > 1 else ''})"
220
  if fuzzy_note:
221
  msg += f" {fuzzy_note}"
agent/tools/sandbox_tool.py CHANGED
@@ -244,7 +244,27 @@ def _make_tool_handler(sandbox_tool_name: str):
244
  try:
245
  result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
246
  if result.success:
247
- return result.output or "(no output)", True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  else:
249
  error_msg = result.error or "Unknown error"
250
  output = result.output
 
244
  try:
245
  result = await asyncio.to_thread(sb.call_tool, sandbox_tool_name, args)
246
  if result.success:
247
+ output = result.output or "(no output)"
248
+ cache = getattr(session, "file_content_cache", None)
249
+ file_path = args.get("path", "")
250
+
251
+ if sandbox_tool_name == "read" and cache and file_path:
252
+ is_unchanged, last_turn = cache.check_unchanged(
253
+ f"sandbox:{file_path}", output
254
+ )
255
+ if is_unchanged:
256
+ return (
257
+ f"[File unchanged since turn {last_turn}, "
258
+ f"content already in context.]"
259
+ ), True
260
+ cache.record_read(
261
+ f"sandbox:{file_path}", output, session.turn_count
262
+ )
263
+
264
+ if sandbox_tool_name in ("write", "edit") and cache and file_path:
265
+ cache.clear_path(f"sandbox:{file_path}")
266
+
267
+ return output, True
268
  else:
269
  error_msg = result.error or "Unknown error"
270
  output = result.output