File size: 3,372 Bytes
e3a472a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """grep_codebase tool — ripgrep-style search inside the ingested repo.
Uses Python's `re` so we don't depend on rg being installed; that lets the
tool run identically in tests, in the local sandbox, and on AMD Cloud.
"""
from __future__ import annotations
import os
import re
from pathlib import Path
from typing import List
from .base import ToolResult, ToolSpec
SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "target", "build", "dist"}
def make_tool(
repo_root: str | Path,
max_matches: int = 200,
max_file_size: int = 2_000_000,
) -> ToolSpec:
root = Path(repo_root).resolve()
def run(pattern: str, path: str = "", case_sensitive: bool = False, max_results: int = 50) -> ToolResult:
try:
flags = 0 if case_sensitive else re.IGNORECASE
rx = re.compile(pattern, flags)
except re.error as e:
return ToolResult(ok=False, output="", error=f"invalid regex: {e}")
scope = (root / path).resolve() if path else root
try:
scope.relative_to(root)
except ValueError:
return ToolResult(ok=False, output="", error=f"path outside repo: {path}")
if not scope.exists():
return ToolResult(ok=False, output="", error=f"not found: {path}")
hits: List[str] = []
n = 0
cap = min(max_results, max_matches)
def consider(filepath: Path):
nonlocal n
if n >= cap:
return
try:
if filepath.stat().st_size > max_file_size:
return
except OSError:
return
try:
text = filepath.read_text(encoding="utf-8", errors="replace")
except OSError:
return
for ln, line in enumerate(text.split("\n"), start=1):
if rx.search(line):
rel = str(filepath.relative_to(root))
hits.append(f"{rel}:{ln}: {line.rstrip()}")
n += 1
if n >= cap:
return
if scope.is_file():
consider(scope)
else:
for dirpath, dirnames, filenames in os.walk(scope):
dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS]
for fn in filenames:
consider(Path(dirpath) / fn)
if n >= cap:
break
if n >= cap:
break
if not hits:
return ToolResult(ok=True, output="(no matches)", extra={"matches": 0})
return ToolResult(ok=True, output="\n".join(hits), extra={"matches": n, "capped": n >= cap})
return ToolSpec(
name="grep_codebase",
description="Search regular expression across files in the ingested repo. Returns path:line:match.",
parameters={
"type": "object",
"properties": {
"pattern": {"type": "string"},
"path": {"type": "string", "description": "Limit search to this subpath. Empty = whole repo.", "default": ""},
"case_sensitive": {"type": "boolean", "default": False},
"max_results": {"type": "integer", "default": 50},
},
"required": ["pattern"],
},
runner=run,
)
|