SwarmAudit / tests /test_repo_crawler.py
Pranoy Mukherjee
Fix Windows repo cloning and add docs agent
7a376ec
from pathlib import Path
from unittest.mock import patch
import pytest
from git import GitCommandError
from app.config import Settings
from app.services.repo_crawler import RepoCrawler, validate_github_url
def test_validate_github_url_normalizes_clone_url():
assert validate_github_url("https://github.com/example/project") == "https://github.com/example/project.git"
def test_validate_github_url_rejects_non_github():
with pytest.raises(ValueError):
validate_github_url("https://gitlab.com/example/project")
def test_scan_local_repo_filters_supported_files(tmp_path: Path):
(tmp_path / "node_modules").mkdir()
(tmp_path / "node_modules" / "ignored.js").write_text("x", encoding="utf-8")
(tmp_path / "app.py").write_text("API_KEY = '1234567890'\n", encoding="utf-8")
(tmp_path / "notes.txt").write_text("hello", encoding="utf-8")
crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1))
result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)
assert [file.path for file in result.files] == ["app.py"]
assert result.skipped_files == 2
def test_scan_local_repo_includes_readme_for_docs_agent(tmp_path: Path):
(tmp_path / "README.md").write_text("# Demo\n", encoding="utf-8")
crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1))
result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)
assert result.files[0].path == "README.md"
assert result.files[0].language == "Markdown"
def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))
with patch("app.services.repo_crawler.os.name", "nt"), patch(
"app.services.repo_crawler.Repo.clone_from"
) as clone_from, patch.object(
crawler,
"scan_local_repo",
return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path),
):
crawler.clone_and_scan("https://github.com/example/project")
assert "kill_after_timeout" not in clone_from.call_args.kwargs
assert clone_from.call_args.kwargs["env"]["HTTPS_PROXY"] == ""
assert clone_from.call_args.kwargs["env"]["ALL_PROXY"] == ""
def test_clone_and_scan_retries_schannel_failure_with_openssl(tmp_path: Path):
crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))
schannel_error = GitCommandError("git clone", 128, stderr="schannel: AcquireCredentialsHandle failed")
with patch("app.services.repo_crawler.os.name", "nt"), patch(
"app.services.repo_crawler.Repo.clone_from",
side_effect=schannel_error,
), patch.object(crawler, "_clone_repo_with_openssl") as clone_with_openssl, patch.object(
crawler,
"scan_local_repo",
return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path),
):
crawler.clone_and_scan("https://github.com/example/project")
clone_with_openssl.assert_called_once()