Spaces:
Running on CPU Upgrade
Running on CPU Upgrade
| """ | |
| GitHub Read File Tool | |
| Reads file contents from a GitHub repository with line range support. | |
| """ | |
| import asyncio | |
| import base64 | |
| import os | |
| from dataclasses import asdict, dataclass | |
| from typing import Any, Dict, Optional, Tuple | |
| try: | |
| import requests | |
| except ImportError: | |
| raise ImportError( | |
| "requests library is required. Install with: pip install requests" | |
| ) | |
| from agent.tools.types import ToolResult | |
| class FileContents: | |
| """File contents with metadata.""" | |
| content: str | |
| sha: str | |
| path: str | |
| size: int | |
| last_modified: Optional[str] | |
| last_commit_sha: Optional[str] | |
| line_start: int | |
| line_end: int | |
| total_lines: int | |
| truncated: bool | |
| message: Optional[str] = None | |
| def to_dict(self): | |
| return asdict(self) | |
| class GitHubAPIError(Exception): | |
| """Raised when GitHub API returns an error.""" | |
| pass | |
| def _get_github_token() -> str: | |
| """Get GitHub token from environment.""" | |
| token = os.environ.get("GITHUB_TOKEN") | |
| if not token: | |
| raise GitHubAPIError( | |
| "GITHUB_TOKEN environment variable is required. " | |
| "Set it with: export GITHUB_TOKEN=your_token_here" | |
| ) | |
| return token | |
| def _fetch_raw_content(owner: str, repo: str, path: str, ref: str, token: str) -> str: | |
| """Fetch raw file content for large files.""" | |
| headers = { | |
| "Accept": "application/vnd.github.raw", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| "Authorization": f"Bearer {token}", | |
| } | |
| url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" | |
| params = {"ref": ref} | |
| response = requests.get(url, headers=headers, params=params, timeout=30) | |
| if response.status_code != 200: | |
| raise GitHubAPIError( | |
| f"Failed to fetch raw content: HTTP {response.status_code}" | |
| ) | |
| return response.text | |
| def _get_last_commit_info( | |
| owner: str, repo: str, path: str, ref: Optional[str], token: str | |
| ) -> Tuple[Optional[str], Optional[str]]: | |
| """Get last commit information for a specific file.""" | |
| headers = { | |
| "Accept": "application/vnd.github+json", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| "Authorization": f"Bearer {token}", | |
| } | |
| url = f"https://api.github.com/repos/{owner}/{repo}/commits" | |
| params = {"path": path, "per_page": 1} | |
| if ref and ref != "HEAD": | |
| params["sha"] = ref | |
| try: | |
| response = requests.get(url, headers=headers, params=params, timeout=30) | |
| if response.status_code == 200: | |
| commits = response.json() | |
| if commits: | |
| commit = commits[0] | |
| commit_sha = commit.get("sha") | |
| commit_date = commit.get("commit", {}).get("committer", {}).get("date") | |
| return commit_date, commit_sha | |
| except: | |
| pass | |
| return None, None | |
| def _fetch_file_contents( | |
| owner: str, | |
| repo: str, | |
| path: str, | |
| ref: str, | |
| line_start: Optional[int], | |
| line_end: Optional[int], | |
| token: str, | |
| ) -> FileContents: | |
| """Fetch file contents from GitHub API.""" | |
| headers = { | |
| "Accept": "application/vnd.github+json", | |
| "X-GitHub-Api-Version": "2022-11-28", | |
| "Authorization": f"Bearer {token}", | |
| } | |
| url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}" | |
| params = {} | |
| if ref and ref != "HEAD": | |
| params["ref"] = ref | |
| try: | |
| response = requests.get(url, headers=headers, params=params, timeout=30) | |
| if response.status_code == 404: | |
| raise GitHubAPIError( | |
| f"File not found: {path} in {owner}/{repo} (ref: {ref})" | |
| ) | |
| if response.status_code != 200: | |
| error_msg = f"GitHub API error (status {response.status_code})" | |
| try: | |
| error_data = response.json() | |
| if "message" in error_data: | |
| error_msg += f": {error_data['message']}" | |
| except: | |
| pass | |
| raise GitHubAPIError(error_msg) | |
| data = response.json() | |
| if data.get("type") != "file": | |
| raise GitHubAPIError( | |
| f"Path {path} is not a file (type: {data.get('type')})" | |
| ) | |
| file_sha = data.get("sha") | |
| file_size = data.get("size", 0) | |
| # Decode content | |
| content_b64 = data.get("content", "") | |
| if content_b64: | |
| content_b64 = content_b64.replace("\n", "").replace(" ", "") | |
| content = base64.b64decode(content_b64).decode("utf-8", errors="replace") | |
| else: | |
| content = _fetch_raw_content(owner, repo, path, ref or "HEAD", token) | |
| except requests.exceptions.RequestException as e: | |
| raise GitHubAPIError(f"Failed to connect to GitHub API: {e}") | |
| # Get last commit info | |
| last_modified, last_commit_sha = _get_last_commit_info( | |
| owner, repo, path, ref, token | |
| ) | |
| # Process line ranges | |
| lines = content.split("\n") | |
| total_lines = len(lines) | |
| truncated = False | |
| message = None | |
| if line_start is None and line_end is None: | |
| if total_lines > 300: | |
| line_start = 1 | |
| line_end = 300 | |
| truncated = True | |
| message = ( | |
| f"File has {total_lines} lines. Returned only the first 300 lines. " | |
| f"To view more, use the line_start and line_end parameters." | |
| ) | |
| else: | |
| line_start = 1 | |
| line_end = total_lines | |
| else: | |
| if line_start is None: | |
| line_start = 1 | |
| if line_end is None: | |
| line_end = total_lines | |
| if line_start < 1: | |
| line_start = 1 | |
| if line_end > total_lines: | |
| line_end = total_lines | |
| if line_start > line_end: | |
| raise ValueError( | |
| f"line_start ({line_start}) cannot be greater than line_end ({line_end})" | |
| ) | |
| selected_lines = lines[line_start - 1 : line_end] | |
| selected_content = "\n".join(selected_lines) | |
| return FileContents( | |
| content=selected_content, | |
| sha=file_sha, | |
| path=path, | |
| size=file_size, | |
| last_modified=last_modified, | |
| last_commit_sha=last_commit_sha, | |
| line_start=line_start, | |
| line_end=line_end, | |
| total_lines=total_lines, | |
| truncated=truncated, | |
| message=message, | |
| ) | |
| def read_file( | |
| repo: str, | |
| path: str, | |
| ref: str = "HEAD", | |
| line_start: Optional[int] = None, | |
| line_end: Optional[int] = None, | |
| ) -> FileContents: | |
| """ | |
| Read file contents from a GitHub repository. | |
| Returns raw file text plus metadata (commit SHA, last modified). | |
| If file is more than 300 lines and no line range is specified, | |
| returns only the first 300 lines with a message. | |
| Args: | |
| repo: Repository in format "owner/repo" (e.g., "huggingface/transformers") | |
| path: Path to file in repository (e.g., "README.md") | |
| ref: Git reference - branch name, tag, or commit SHA (default: "HEAD") | |
| line_start: Starting line number (1-indexed, inclusive) | |
| line_end: Ending line number (1-indexed, inclusive) | |
| Returns: | |
| FileContents object with content and metadata | |
| """ | |
| if "/" not in repo: | |
| raise ValueError("repo must be in format 'owner/repo'") | |
| owner, repo_name = repo.split("/", 1) | |
| token = _get_github_token() | |
| return _fetch_file_contents( | |
| owner=owner, | |
| repo=repo_name, | |
| path=path, | |
| ref=ref, | |
| line_start=line_start, | |
| line_end=line_end, | |
| token=token, | |
| ) | |
| async def _async_call(func, *args, **kwargs): | |
| """Wrap synchronous calls for async context.""" | |
| return await asyncio.to_thread(func, *args, **kwargs) | |
| class ReadFileTool: | |
| """Tool for reading files from GitHub repositories.""" | |
| async def execute(self, params: Dict[str, Any]) -> ToolResult: | |
| """Execute read_file operation.""" | |
| repo = params.get("repo") | |
| path = params.get("path") | |
| if not repo or not path: | |
| return { | |
| "formatted": "Error: 'repo' and 'path' parameters are required", | |
| "totalResults": 0, | |
| "resultsShared": 0, | |
| "isError": True, | |
| } | |
| ref = params.get("ref", "HEAD") | |
| line_start = params.get("line_start") | |
| line_end = params.get("line_end") | |
| try: | |
| file_contents = await _async_call( | |
| read_file, | |
| repo=repo, | |
| path=path, | |
| ref=ref, | |
| line_start=line_start, | |
| line_end=line_end, | |
| ) | |
| response = f"**File: {file_contents.path}**\n" | |
| response += f"**Repo: {repo}**\n" | |
| response += f"**Lines:** {file_contents.line_start}-{file_contents.line_end} of {file_contents.total_lines}\n" | |
| response += f"**SHA:** {file_contents.sha}\n" | |
| if file_contents.last_modified: | |
| response += f"**Last modified:** {file_contents.last_modified}\n" | |
| if file_contents.message: | |
| response += f"\n⚠️ {file_contents.message}\n" | |
| response += f"\n```\n{file_contents.content}\n```" | |
| return { | |
| "formatted": response, | |
| "totalResults": 1, | |
| "resultsShared": 1, | |
| } | |
| except GitHubAPIError as e: | |
| return { | |
| "formatted": f"GitHub API Error: {str(e)}", | |
| "totalResults": 0, | |
| "resultsShared": 0, | |
| "isError": True, | |
| } | |
| except Exception as e: | |
| return { | |
| "formatted": f"Error: {str(e)}", | |
| "totalResults": 0, | |
| "resultsShared": 0, | |
| "isError": True, | |
| } | |
| # Tool specification | |
| READ_FILE_TOOL_SPEC = { | |
| "name": "read_file", | |
| "description": ( | |
| "Read file contents from a GitHub repository.\n\n" | |
| "Returns raw file text plus metadata (commit SHA, last modified).\n" | |
| "If file is more than 300 lines, returns only the first 300 lines and includes line_start and line_end indexes.\n" | |
| "Use line_start and line_end parameters to view specific line ranges.\n\n" | |
| "Examples:\n" | |
| "- Read README: {'repo': 'huggingface/transformers', 'path': 'README.md'}\n" | |
| "- Read specific lines: {'repo': 'huggingface/transformers', 'path': 'src/transformers/__init__.py', 'line_start': 1, 'line_end': 50}\n" | |
| "- Read from branch: {'repo': 'torvalds/linux', 'path': 'MAINTAINERS', 'ref': 'master', 'line_start': 1, 'line_end': 20}\n\n" | |
| ), | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "repo": { | |
| "type": "string", | |
| "description": "Repository in format 'owner/repo' (e.g., 'huggingface/transformers')", | |
| }, | |
| "path": { | |
| "type": "string", | |
| "description": "Path to file in repository (e.g., 'README.md', 'src/main.py')", | |
| }, | |
| "ref": { | |
| "type": "string", | |
| "description": "Git reference: branch name, tag, or commit SHA (default: 'HEAD')", | |
| }, | |
| "line_start": { | |
| "type": "integer", | |
| "description": "Starting line number (1-indexed, inclusive). Use to read specific range.", | |
| }, | |
| "line_end": { | |
| "type": "integer", | |
| "description": "Ending line number (1-indexed, inclusive). Use to read specific range.", | |
| }, | |
| }, | |
| "required": ["repo", "path"], | |
| }, | |
| } | |
| async def read_file_handler(arguments: Dict[str, Any]) -> tuple[str, bool]: | |
| """Handler for agent tool router.""" | |
| try: | |
| tool = ReadFileTool() | |
| result = await tool.execute(arguments) | |
| return result["formatted"], not result.get("isError", False) | |
| except Exception as e: | |
| return f"Error executing read_file: {str(e)}", False | |