| """grep_codebase tool — ripgrep-style search inside the ingested repo. |
| |
| Uses Python's `re` so we don't depend on rg being installed; that lets the |
| tool run identically in tests, in the local sandbox, and on AMD Cloud. |
| """ |
| from __future__ import annotations |
| import os |
| import re |
| from pathlib import Path |
| from typing import List |
|
|
| from .base import ToolResult, ToolSpec |
|
|
|
|
| SKIP_DIRS = {".git", "node_modules", ".venv", "venv", "__pycache__", "target", "build", "dist"} |
|
|
|
|
| def make_tool( |
| repo_root: str | Path, |
| max_matches: int = 200, |
| max_file_size: int = 2_000_000, |
| ) -> ToolSpec: |
| root = Path(repo_root).resolve() |
|
|
| def run(pattern: str, path: str = "", case_sensitive: bool = False, max_results: int = 50) -> ToolResult: |
| try: |
| flags = 0 if case_sensitive else re.IGNORECASE |
| rx = re.compile(pattern, flags) |
| except re.error as e: |
| return ToolResult(ok=False, output="", error=f"invalid regex: {e}") |
|
|
| scope = (root / path).resolve() if path else root |
| try: |
| scope.relative_to(root) |
| except ValueError: |
| return ToolResult(ok=False, output="", error=f"path outside repo: {path}") |
| if not scope.exists(): |
| return ToolResult(ok=False, output="", error=f"not found: {path}") |
|
|
| hits: List[str] = [] |
| n = 0 |
| cap = min(max_results, max_matches) |
|
|
| def consider(filepath: Path): |
| nonlocal n |
| if n >= cap: |
| return |
| try: |
| if filepath.stat().st_size > max_file_size: |
| return |
| except OSError: |
| return |
| try: |
| text = filepath.read_text(encoding="utf-8", errors="replace") |
| except OSError: |
| return |
| for ln, line in enumerate(text.split("\n"), start=1): |
| if rx.search(line): |
| rel = str(filepath.relative_to(root)) |
| hits.append(f"{rel}:{ln}: {line.rstrip()}") |
| n += 1 |
| if n >= cap: |
| return |
|
|
| if scope.is_file(): |
| consider(scope) |
| else: |
| for dirpath, dirnames, filenames in os.walk(scope): |
| dirnames[:] = [d for d in dirnames if d not in SKIP_DIRS] |
| for fn in filenames: |
| consider(Path(dirpath) / fn) |
| if n >= cap: |
| break |
| if n >= cap: |
| break |
|
|
| if not hits: |
| return ToolResult(ok=True, output="(no matches)", extra={"matches": 0}) |
| return ToolResult(ok=True, output="\n".join(hits), extra={"matches": n, "capped": n >= cap}) |
|
|
| return ToolSpec( |
| name="grep_codebase", |
| description="Search regular expression across files in the ingested repo. Returns path:line:match.", |
| parameters={ |
| "type": "object", |
| "properties": { |
| "pattern": {"type": "string"}, |
| "path": {"type": "string", "description": "Limit search to this subpath. Empty = whole repo.", "default": ""}, |
| "case_sensitive": {"type": "boolean", "default": False}, |
| "max_results": {"type": "integer", "default": 50}, |
| }, |
| "required": ["pattern"], |
| }, |
| runner=run, |
| ) |
|
|