Spaces:

SouravNath
/

repomind-api

Running

File size: 18,854 Bytes

dc71cad

"""
tests/test_phase3_localisation.py
──────────────────────────────────
Unit tests for Phase 3: BM25, RRF fusion, DeBERTa ranker, and pipeline.

All tests work without OpenAI API key or GPU — components degrade gracefully.

Run with: pytest tests/test_phase3_localisation.py -v
"""
from __future__ import annotations

import textwrap
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest


# ── Helpers ────────────────────────────────────────────────────────────────────

def make_file_symbols(file_path: str, summary: str = ""):
    """Create a minimal FileSymbols for testing."""
    from ast_parser.python_parser import FileSymbols, FunctionInfo
    fs = FileSymbols(file_path=file_path, file_hash="aaa111")
    fs.module_docstring = summary
    # Also add a fake function whose name contains the summary words
    # so summary_text is fully populated
    if summary:
        fs.functions = [
            FunctionInfo(
                name=summary.split()[0] if summary.split() else "placeholder",
                qualified_name=summary.split()[0] if summary.split() else "placeholder",
                args=[], decorators=[], docstring=summary,
                start_line=1, end_line=5,
            )
        ]
    return fs


# ── BM25 tokeniser ────────────────────────────────────────────────────────────

class TestTokeniser:
    def test_lowercase(self):
        from localisation.bm25_retriever import _tokenise
        result = _tokenise("Hello World")
        assert all(t == t.lower() for t in result)

    def test_camel_case_split(self):
        from localisation.bm25_retriever import _tokenise
        result = _tokenise("QuerySet")
        assert "query" in result
        assert "set" in result

    def test_snake_case_split(self):
        from localisation.bm25_retriever import _tokenise
        result = _tokenise("get_queryset")
        assert "get" in result
        assert "queryset" in result

    def test_short_tokens_filtered(self):
        from localisation.bm25_retriever import _tokenise
        result = _tokenise("a b c def")
        assert "a" not in result
        assert "b" not in result
        assert "def" in result

    def test_path_tokenisation(self):
        from localisation.bm25_retriever import _tokenise
        result = _tokenise("django/db/models/query.py")
        assert "django" in result
        assert "models" in result
        assert "query" in result


# ── BM25 Retriever ────────────────────────────────────────────────────────────

class TestBM25Retriever:
    def test_index_and_query_basic(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        symbols = [
            make_file_symbols("models/user.py", "User authentication login password"),
            make_file_symbols("views/dashboard.py", "Dashboard render template"),
            make_file_symbols("utils/email.py", "Email sending SMTP"),
        ]
        retriever.index(symbols)
        hits = retriever.query("user authentication login", top_k=3)
        assert len(hits) >= 1
        assert hits[0].file_path == "models/user.py"

    def test_query_returns_positive_scores_only(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        symbols = [make_file_symbols(f"file{i}.py", f"content {i}") for i in range(5)]
        retriever.index(symbols)
        hits = retriever.query("content 3", top_k=5)
        assert all(h.score > 0 for h in hits)

    def test_ranks_are_sequential(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        symbols = [make_file_symbols(f"f{i}.py", f"word{i} text") for i in range(3)]
        retriever.index(symbols)
        hits = retriever.query("word0 text", top_k=3)
        assert [h.rank for h in hits] == list(range(1, len(hits) + 1))

    def test_empty_query_returns_empty(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        symbols = [make_file_symbols("a.py", "content")]
        retriever.index(symbols)
        hits = retriever.query("", top_k=5)
        assert hits == []

    def test_corpus_size(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        symbols = [make_file_symbols(f"f{i}.py", "text") for i in range(7)]
        retriever.index(symbols)
        assert retriever.corpus_size == 7

    def test_file_path_tokens_boost(self):
        from localisation.bm25_retriever import BM25Retriever
        # Both files have 'models' in content. But models.py ALSO has it in
        # path (doubled) — with a larger corpus that gives positive BM25 scores.
        retriever = BM25Retriever()
        symbols = [
            make_file_symbols("django/db/models.py", "handles database records"),
            make_file_symbols("utils/helper.py", "general utilities helper"),
            make_file_symbols("views/base.py", "base view rendering"),
            make_file_symbols("core/app.py", "application entry point"),
            make_file_symbols("api/serializers.py", "rest framework serializers"),
        ]
        retriever.index(symbols)
        hits = retriever.query("models", top_k=5)
        # models.py has 'models' in path (2x weight) — must appear in results
        paths = [h.file_path for h in hits]
        assert "django/db/models.py" in paths

    def test_not_indexed_raises(self):
        from localisation.bm25_retriever import BM25Retriever
        retriever = BM25Retriever()
        with pytest.raises(RuntimeError, match="not indexed"):
            retriever.query("test", top_k=5)

    def test_skips_parse_error_files(self):
        from localisation.bm25_retriever import BM25Retriever
        from ast_parser.python_parser import FileSymbols
        retriever = BM25Retriever()
        good = make_file_symbols("good.py", "good content")
        bad = FileSymbols(file_path="bad.py", file_hash="bbb", parse_error="SyntaxError")
        retriever.index([good, bad])
        assert retriever.corpus_size == 1


# ── RRF Fusion ────────────────────────────────────────────────────────────────

class TestRRFFusion:
    def test_basic_fusion(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [("a.py", 1.0, 1), ("b.py", 0.8, 2), ("c.py", 0.5, 3)]
        embed = [("b.py", 0.9, 1), ("a.py", 0.7, 2), ("d.py", 0.6, 3)]
        ppr = {"a.py": 0.5, "b.py": 0.3}

        result = reciprocal_rank_fusion(bm25, embed, ppr, top_k=4)
        assert len(result) <= 4
        # a.py appears in all three → should rank high
        top_paths = [h.file_path for h in result]
        assert "a.py" in top_paths[:2]

    def test_top_k_respected(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [(f"f{i}.py", 1.0, i + 1) for i in range(10)]
        result = reciprocal_rank_fusion(bm25, [], {}, top_k=3)
        assert len(result) == 3

    def test_empty_inputs(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        result = reciprocal_rank_fusion([], [], {}, top_k=5)
        assert result == []

    def test_ranks_sequential(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [("a.py", 1.0, 1), ("b.py", 0.5, 2)]
        result = reciprocal_rank_fusion(bm25, [], {}, top_k=5)
        assert [h.rank for h in result] == list(range(1, len(result) + 1))

    def test_all_sources_tracked(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [("a.py", 1.0, 1)]
        embed = [("a.py", 0.9, 1)]
        ppr = {"a.py": 0.5}
        result = reciprocal_rank_fusion(bm25, embed, ppr, top_k=1)
        hit = result[0]
        assert hit.bm25_rank == 1
        assert hit.embed_rank == 1
        assert hit.ppr_rank == 1

    def test_ablation_no_ppr(self):
        from localisation.rrf_fusion import ablate
        bm25 = [("a.py", 1.0, 1)]
        ppr = {"b.py": 99.0}  # b.py has huge PPR score
        # With PPR zeroed out, b.py should NOT appear
        result = ablate(bm25, [], ppr, use_ppr=False, top_k=5)
        paths = [h.file_path for h in result]
        assert "b.py" not in paths

    def test_scores_descending(self):
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [("a.py", 1.0, 1), ("b.py", 0.5, 2), ("c.py", 0.1, 3)]
        result = reciprocal_rank_fusion(bm25, [], {}, top_k=3)
        scores = [h.fused_score for h in result]
        assert scores == sorted(scores, reverse=True)

    def test_union_of_all_lists(self):
        """File appearing only in PPR should still be in results."""
        from localisation.rrf_fusion import reciprocal_rank_fusion
        bm25 = [("a.py", 1.0, 1)]
        ppr = {"z.py": 1.0}  # only in PPR
        result = reciprocal_rank_fusion(bm25, [], ppr, top_k=10)
        paths = [h.file_path for h in result]
        assert "z.py" in paths


# ── DeBERTa Ranker — without GPU ──────────────────────────────────────────────

class TestDeBERTaRankerFallback:
    """Tests for graceful fallback when model is not loaded."""

    def test_rerank_fallback_returns_stage1_order(self):
        from localisation.deberta_ranker import DeBERTaRanker
        # Don't actually load the model
        ranker = DeBERTaRanker.__new__(DeBERTaRanker)
        ranker._available = False
        ranker._model = None
        ranker._tokenizer = None

        candidates = [("a.py", "summary a"), ("b.py", "summary b"), ("c.py", "summary c")]
        result = ranker.rerank("fix the bug", candidates, top_k=3)
        assert len(result) == 3
        assert result[0].file_path == "a.py"
        assert result[0].rank == 1

    def test_rerank_empty_candidates(self):
        from localisation.deberta_ranker import DeBERTaRanker
        ranker = DeBERTaRanker.__new__(DeBERTaRanker)
        ranker._available = False
        result = ranker.rerank("query", [], top_k=5)
        assert result == []

    def test_ranked_file_scores_are_positive(self):
        from localisation.deberta_ranker import DeBERTaRanker
        ranker = DeBERTaRanker.__new__(DeBERTaRanker)
        ranker._available = False
        candidates = [(f"f{i}.py", f"text {i}") for i in range(5)]
        result = ranker.rerank("test query", candidates, top_k=5)
        assert all(r.relevance_score > 0 for r in result)


# ── Recall metric ─────────────────────────────────────────────────────────────

class TestRecallMetric:
    def test_perfect_recall(self):
        from localisation.deberta_ranker import recall_at_k
        preds = ["a.py", "b.py", "c.py"]
        gold = ["a.py", "b.py"]
        assert recall_at_k(preds, gold, k=5) == 1.0

    def test_zero_recall(self):
        from localisation.deberta_ranker import recall_at_k
        preds = ["x.py", "y.py"]
        gold = ["a.py"]
        assert recall_at_k(preds, gold, k=5) == 0.0

    def test_partial_recall(self):
        from localisation.deberta_ranker import recall_at_k
        preds = ["a.py", "b.py", "c.py"]
        gold = ["a.py", "z.py"]
        assert recall_at_k(preds, gold, k=5) == 0.5

    def test_recall_at_k_respects_k(self):
        from localisation.deberta_ranker import recall_at_k
        preds = ["x.py", "a.py"]  # a.py is at position 2
        gold = ["a.py"]
        assert recall_at_k(preds, gold, k=1) == 0.0   # only looking at top-1
        assert recall_at_k(preds, gold, k=2) == 1.0

    def test_empty_gold(self):
        from localisation.deberta_ranker import recall_at_k
        assert recall_at_k(["a.py"], [], k=5) == 0.0


# ── Patch file extraction ─────────────────────────────────────────────────────

class TestExtractFilesFromPatch:
    def test_basic_unified_diff(self):
        from localisation.deberta_ranker import _extract_files_from_patch
        patch = textwrap.dedent("""
            diff --git a/django/db/models/query.py b/django/db/models/query.py
            --- a/django/db/models/query.py
            +++ b/django/db/models/query.py
            @@ -1 +1 @@
            -old
            +new
        """)
        files = _extract_files_from_patch(patch)
        assert "django/db/models/query.py" in files

    def test_multiple_files(self):
        from localisation.deberta_ranker import _extract_files_from_patch
        patch = textwrap.dedent("""
            --- a/foo.py
            +++ b/foo.py
            @@ -1 +1 @@ fix
            --- a/bar.py
            +++ b/bar.py
            @@ -1 +1 @@ fix
        """)
        files = _extract_files_from_patch(patch)
        assert "foo.py" in files
        assert "bar.py" in files

    def test_dev_null_excluded(self):
        from localisation.deberta_ranker import _extract_files_from_patch
        patch = "--- /dev/null\n+++ b/new_file.py\n"
        files = _extract_files_from_patch(patch)
        assert "/dev/null" not in files

    def test_empty_patch(self):
        from localisation.deberta_ranker import _extract_files_from_patch
        assert _extract_files_from_patch("") == []


# ── Failure categorisation ────────────────────────────────────────────────────

class TestFailureCategorisation:
    def test_success(self):
        from localisation.pipeline import categorise_localisation_failure
        result = categorise_localisation_failure(["a.py", "b.py"], ["a.py"], "good long detailed issue text here")
        assert result == "success"

    def test_wrong_file(self):
        from localisation.pipeline import categorise_localisation_failure
        # Long issue text (>10 words) + no gold file found → wrong_file
        long_issue = "there is a null pointer exception in the query filter method"
        result = categorise_localisation_failure(["x.py", "y.py"], ["z.py"], long_issue)
        assert result == "wrong_file"

    def test_partial_file(self):
        from localisation.pipeline import categorise_localisation_failure
        result = categorise_localisation_failure(["a.py"], ["a.py", "b.py"], "long enough issue text to be valid")
        assert result == "partial_file"

    def test_ambiguous_issue(self):
        from localisation.pipeline import categorise_localisation_failure
        result = categorise_localisation_failure(["x.py"], ["z.py"], "fix bug")  # very short
        assert result == "ambiguous_issue"


# ── Pipeline integration (no API required) ────────────────────────────────────

class TestLocalisationPipeline:
    def test_pipeline_bm25_only(self):
        from localisation.pipeline import LocalisationPipeline
        pipeline = LocalisationPipeline(
            use_embeddings=False,
            use_deberta=False,
            use_ppr=False,
        )
        symbols = [
            make_file_symbols("auth/models.py", "User model authentication password hash"),
            make_file_symbols("views/login.py", "Login view render form"),
            make_file_symbols("utils/email.py", "Email SMTP send message"),
        ]
        pipeline.index_repo(symbols)
        result = pipeline.localise("user authentication login password", top_k=3)
        assert len(result.hits) >= 1
        assert result.hits[0].file_path == "auth/models.py"

    def test_pipeline_empty_query(self):
        from localisation.pipeline import LocalisationPipeline
        pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False)
        symbols = [make_file_symbols("a.py", "content")]
        pipeline.index_repo(symbols)
        result = pipeline.localise("")
        assert result.failure_category == "empty_query"
        assert result.hits == []

    def test_pipeline_with_gold_files_computes_recall(self):
        from localisation.pipeline import LocalisationPipeline
        pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
        # Use a larger corpus so BM25 gives positive scores
        # 'queryset' appears in path AND content of target.py → guaranteed top-1
        symbols = [
            make_file_symbols("db/queryset.py", "queryset filter method database orm"),
            make_file_symbols("views/generic.py", "generic view rendering template"),
            make_file_symbols("utils/helper.py", "utility functions general purpose"),
            make_file_symbols("api/serializer.py", "rest framework serializer fields"),
            make_file_symbols("forms/widget.py", "html form widget rendering input"),
        ]
        pipeline.index_repo(symbols)
        result = pipeline.localise(
            "fix null pointer exception in queryset filter", top_k=5,
            gold_files=["db/queryset.py"]
        )
        assert result.recall_at_5 is not None
        assert result.recall_at_10 is not None
        assert result.recall_at_5 == 1.0  # queryset in path+content guarantees top rank

    def test_top_k_paths_property(self):
        from localisation.pipeline import LocalisationPipeline
        pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
        symbols = [make_file_symbols(f"f{i}.py", f"content {i}") for i in range(5)]
        pipeline.index_repo(symbols)
        result = pipeline.localise("content 1", top_k=3)
        assert len(result.top_k_paths) == len(result.hits)

    def test_hit_diagnostic_flags(self):
        from localisation.pipeline import LocalisationPipeline
        pipeline = LocalisationPipeline(use_embeddings=False, use_deberta=False, use_ppr=False)
        symbols = [make_file_symbols("a.py", "special word")]
        pipeline.index_repo(symbols)
        result = pipeline.localise("special word", top_k=1)
        if result.hits:
            hit = result.hits[0]
            assert hit.in_bm25 is True