from pathlib import Path from unittest.mock import patch import pytest from git import GitCommandError from app.config import Settings from app.services.repo_crawler import RepoCrawler, validate_github_url def test_validate_github_url_normalizes_clone_url(): assert validate_github_url("https://github.com/example/project") == "https://github.com/example/project.git" def test_validate_github_url_rejects_non_github(): with pytest.raises(ValueError): validate_github_url("https://gitlab.com/example/project") def test_scan_local_repo_filters_supported_files(tmp_path: Path): (tmp_path / "node_modules").mkdir() (tmp_path / "node_modules" / "ignored.js").write_text("x", encoding="utf-8") (tmp_path / "app.py").write_text("API_KEY = '1234567890'\n", encoding="utf-8") (tmp_path / "notes.txt").write_text("hello", encoding="utf-8") crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1)) result = crawler.scan_local_repo("https://github.com/example/project", tmp_path) assert [file.path for file in result.files] == ["app.py"] assert result.skipped_files == 2 def test_scan_local_repo_includes_readme_for_docs_agent(tmp_path: Path): (tmp_path / "README.md").write_text("# Demo\n", encoding="utf-8") crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1)) result = crawler.scan_local_repo("https://github.com/example/project", tmp_path) assert result.files[0].path == "README.md" assert result.files[0].language == "Markdown" def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path): crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones"))) with patch("app.services.repo_crawler.os.name", "nt"), patch( "app.services.repo_crawler.Repo.clone_from" ) as clone_from, patch.object( crawler, "scan_local_repo", return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path), ): crawler.clone_and_scan("https://github.com/example/project") assert "kill_after_timeout" not in clone_from.call_args.kwargs assert clone_from.call_args.kwargs["env"]["HTTPS_PROXY"] == "" assert clone_from.call_args.kwargs["env"]["ALL_PROXY"] == "" def test_clone_and_scan_retries_schannel_failure_with_openssl(tmp_path: Path): crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones"))) schannel_error = GitCommandError("git clone", 128, stderr="schannel: AcquireCredentialsHandle failed") with patch("app.services.repo_crawler.os.name", "nt"), patch( "app.services.repo_crawler.Repo.clone_from", side_effect=schannel_error, ), patch.object(crawler, "_clone_repo_with_openssl") as clone_with_openssl, patch.object( crawler, "scan_local_repo", return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path), ): crawler.clone_and_scan("https://github.com/example/project") clone_with_openssl.assert_called_once()