File size: 8,813 Bytes

e51378c

"""
Tests for GraphRAG Novelties Engine
Run: python tests/test_novelties.py
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from graphrag.layers.novelties import (
    PPRConfidenceScorer, TokenBudgetController, PathPruner,
    SpreadingActivation, PolyGRouter, IncrementalGraphUpdater,
    NoveltyEngine,
)

# ── Sample graph for testing ──────────────────────────

ADJACENCY = {
    "einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)],
    "physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)],
    "relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)],
    "newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)],
    "germany": [("einstein", 0.7), ("berlin", 0.6)],
    "gravity": [("newton", 0.9), ("spacetime", 0.7)],
    "spacetime": [("relativity", 0.9), ("gravity", 0.7)],
    "england": [("newton", 0.7)],
    "berlin": [("germany", 0.6)],
}

ENTITY_TO_CHUNKS = {
    "einstein": ["c1", "c2"],
    "relativity": ["c2", "c3"],
    "newton": ["c4"],
    "physics": ["c1", "c3", "c4"],
}

CHUNK_TEXTS = {
    "c1": "Einstein was a physicist who developed the theory of relativity.",
    "c2": "The theory of relativity was published by Einstein in 1905.",
    "c3": "Relativity changed our understanding of physics and spacetime.",
    "c4": "Newton developed classical mechanics and the law of gravity.",
}


# ── PPR Tests ─────────────────────────────────────────

def test_ppr_basic():
    scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20)
    scores = scorer.compute_ppr(ADJACENCY, ["einstein"])
    assert "einstein" in scores
    assert scores["einstein"] > 0
    assert scores.get("relativity", 0) > scores.get("berlin", 0)  # closer = higher

def test_ppr_multiple_seeds():
    scorer = PPRConfidenceScorer()
    scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"])
    assert scores.get("physics", 0) > 0  # connected to both seeds

def test_ppr_empty():
    scorer = PPRConfidenceScorer()
    assert scorer.compute_ppr({}, []) == {}

def test_ppr_context_scoring():
    scorer = PPRConfidenceScorer()
    ppr = scorer.compute_ppr(ADJACENCY, ["einstein"])
    ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
    assert len(ranked) > 0
    assert ranked[0][2] >= ranked[-1][2]  # sorted descending


# ── Token Budget Tests ────────────────────────────────

def test_budget_basic():
    ctrl = TokenBudgetController(max_tokens=50)
    items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)]
    selected, stats = ctrl.prune_context(items)
    assert stats["used_tokens"] <= 50
    assert stats["items_selected"] <= 2

def test_budget_all_fit():
    ctrl = TokenBudgetController(max_tokens=10000)
    items = [("Hello.", 0.9), ("World.", 0.8)]
    selected, stats = ctrl.prune_context(items)
    assert len(selected) == 2
    assert stats["reduction_pct"] >= 0

def test_budget_priority():
    ctrl = TokenBudgetController(max_tokens=20)
    items = [("Low priority text.", 0.1), ("High priority!", 0.9)]
    selected, stats = ctrl.prune_context(items)
    assert "High priority!" in selected[0]  # highest score first

def test_budget_stats():
    ctrl = TokenBudgetController(max_tokens=100)
    items = [("a " * 200, 0.9)]  # 400 chars ≈ 100 tokens
    _, stats = ctrl.prune_context(items)
    assert "budget_tokens" in stats
    assert "reduction_pct" in stats


# ── Path Pruner Tests ─────────────────────────────────

def test_path_find():
    adj_with_rel = {
        "A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)],
        "B": [("D", "LOCATED_IN", 0.8)],
        "C": [("D", "PART_OF", 0.7)],
    }
    pruner = PathPruner()
    paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3)
    assert len(paths) >= 1

def test_path_scoring():
    pruner = PathPruner()
    paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]]
    weights = {("A", "B"): 0.9, ("B", "C"): 0.8}
    scored = pruner.score_and_prune(paths, weights, threshold=0.1)
    assert len(scored) == 1
    assert scored[0][1] == 0.9 * 0.8  # product of edge weights

def test_path_serialize():
    pruner = PathPruner()
    scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)]
    text = pruner.serialize_paths(scored)
    assert "Einstein" in text
    assert "confidence: 0.720" in text


# ── Spreading Activation Tests ────────────────────────

def test_activation_basic():
    sa = SpreadingActivation(decay_factor=0.7, max_steps=2)
    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
    assert acts["einstein"] == 1.0
    assert acts.get("relativity", 0) > 0  # directly connected
    assert acts.get("berlin", 0) < acts.get("physics", 0)  # further away

def test_activation_ranking():
    sa = SpreadingActivation()
    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
    ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
    assert len(ranked) > 0
    assert ranked[0][2] >= ranked[-1][2]

def test_activation_decay():
    sa = SpreadingActivation(decay_factor=0.5, max_steps=3)
    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
    # Further nodes should have lower activation
    assert acts.get("einstein", 0) >= acts.get("berlin", 0)


# ── PolyG Router Tests ────────────────────────────────

def test_router_entity_centric():
    router = PolyGRouter()
    result = router.classify_query("What is quantum physics?")
    assert result["query_type"] == "entity_centric"
    assert result["use_graph"] is True

def test_router_multi_hop():
    router = PolyGRouter()
    result = router.classify_query("Were Einstein and Newton of the same nationality?")
    assert result["query_type"] == "multi_hop"
    assert result["strategy"] == "graph_traversal"

def test_router_comparison():
    router = PolyGRouter()
    result = router.classify_query("Compare the theories of Einstein and Hawking")
    assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"])

def test_router_summarization():
    router = PolyGRouter()
    result = router.classify_query("Summarize the main themes of quantum physics")
    assert result["strategy"] == "community_summary"

def test_router_has_fields():
    router = PolyGRouter()
    result = router.classify_query("test query")
    assert "strategy" in result
    assert "confidence" in result
    assert "reasoning" in result


# ── Incremental Updater Tests ─────────────────────────

def test_updater_scope():
    updater = IncrementalGraphUpdater()
    adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]}
    affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2)
    assert "A" in affected
    assert "B" in affected
    assert "D" in affected  # 2 hops from A

def test_updater_plan():
    updater = IncrementalGraphUpdater()
    plan = updater.plan_update(
        new_entities=[{"name": "X"}],
        new_relations=[{"source": "X", "target": "Y"}],
        existing_entity_count=100,
    )
    assert plan["new_entities"] == 1
    assert plan["vs_full_rebuild_savings_pct"] > 90


# ── NoveltyEngine Integration Test ───────────────────

def test_novelty_engine():
    engine = NoveltyEngine(token_budget=500)
    result = engine.enhanced_retrieve(
        query="What did Einstein discover?",
        adjacency=ADJACENCY,
        seed_entities=["einstein"],
        entity_to_chunks=ENTITY_TO_CHUNKS,
        chunk_texts=CHUNK_TEXTS,
    )
    assert "contexts" in result
    assert "routing" in result
    assert "budget_stats" in result
    assert "technique_chain" in result
    assert len(result["technique_chain"]) == 5
    assert result["budget_stats"]["used_tokens"] <= 500


if __name__ == "__main__":
    import traceback
    tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
    passed = failed = 0
    for name, fn in tests:
        try:
            fn()
            print(f"  ✅ {name}")
            passed += 1
        except Exception as e:
            print(f"  ❌ {name}: {e}")
            traceback.print_exc()
            failed += 1
    print(f"\n{'='*50}")
    print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total")
    if failed == 0:
        print("🎉 ALL NOVELTY TESTS PASSED!")