File size: 3,724 Bytes
a3ecd30
7a376ec
a3ecd30
 
7a376ec
a3ecd30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e9cb33
 
 
 
 
 
 
 
 
 
7a376ec
 
9237011
 
 
 
 
 
 
 
 
 
 
7a376ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
from pathlib import Path
from unittest.mock import patch

import pytest
from git import GitCommandError

from app.config import Settings
from app.services.repo_crawler import RepoCrawler, validate_github_url


def test_validate_github_url_normalizes_clone_url():
    assert validate_github_url("https://github.com/example/project") == "https://github.com/example/project.git"


def test_validate_github_url_rejects_non_github():
    with pytest.raises(ValueError):
        validate_github_url("https://gitlab.com/example/project")


def test_scan_local_repo_filters_supported_files(tmp_path: Path):
    (tmp_path / "node_modules").mkdir()
    (tmp_path / "node_modules" / "ignored.js").write_text("x", encoding="utf-8")
    (tmp_path / "app.py").write_text("API_KEY = '1234567890'\n", encoding="utf-8")
    (tmp_path / "notes.txt").write_text("hello", encoding="utf-8")

    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1))
    result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)

    assert [file.path for file in result.files] == ["app.py"]
    assert result.skipped_files == 2


def test_scan_local_repo_includes_readme_for_docs_agent(tmp_path: Path):
    (tmp_path / "README.md").write_text("# Demo\n", encoding="utf-8")

    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1))
    result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)

    assert result.files[0].path == "README.md"
    assert result.files[0].language == "Markdown"


def test_scan_local_repo_includes_dependency_manifests(tmp_path: Path):
    (tmp_path / "requirements.txt").write_text("requests==2.28.0\n", encoding="utf-8")
    (tmp_path / "package.json").write_text('{"dependencies": {"express": "4.18.2"}}', encoding="utf-8")

    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=10))
    result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)

    assert {source_file.path for source_file in result.files} == {"requirements.txt", "package.json"}
    assert {source_file.language for source_file in result.files} == {"Python Requirements", "Node Package"}


def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))

    with patch("app.services.repo_crawler.os.name", "nt"), patch(
        "app.services.repo_crawler.Repo.clone_from"
    ) as clone_from, patch.object(
        crawler,
        "scan_local_repo",
        return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path),
    ):
        crawler.clone_and_scan("https://github.com/example/project")

    assert "kill_after_timeout" not in clone_from.call_args.kwargs
    assert clone_from.call_args.kwargs["env"]["HTTPS_PROXY"] == ""
    assert clone_from.call_args.kwargs["env"]["ALL_PROXY"] == ""


def test_clone_and_scan_retries_schannel_failure_with_openssl(tmp_path: Path):
    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))
    schannel_error = GitCommandError("git clone", 128, stderr="schannel: AcquireCredentialsHandle failed")

    with patch("app.services.repo_crawler.os.name", "nt"), patch(
        "app.services.repo_crawler.Repo.clone_from",
        side_effect=schannel_error,
    ), patch.object(crawler, "_clone_repo_with_openssl") as clone_with_openssl, patch.object(
        crawler,
        "scan_local_repo",
        return_value=crawler.scan_local_repo("https://github.com/example/project", tmp_path),
    ):
        crawler.clone_and_scan("https://github.com/example/project")

    clone_with_openssl.assert_called_once()