muthuk1
/

graphrag-inference-hackathon

Model card Files Files and versions

xet

Community

muthuk1 commited on 12 days ago

Commit

e51378c

verified ·

1 Parent(s): fe766f7

Add unit tests for all 6 novelty techniques"

Browse files

Files changed (1) hide show

tests/test_novelties.py +238 -0

tests/test_novelties.py ADDED Viewed

	@@ -0,0 +1,238 @@

+"""
+Tests for GraphRAG Novelties Engine
+Run: python tests/test_novelties.py
+"""
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from graphrag.layers.novelties import (
+    PPRConfidenceScorer, TokenBudgetController, PathPruner,
+    SpreadingActivation, PolyGRouter, IncrementalGraphUpdater,
+    NoveltyEngine,
+)
+# ── Sample graph for testing ──────────────────────────
+ADJACENCY = {
+    "einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)],
+    "physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)],
+    "relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)],
+    "newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)],
+    "germany": [("einstein", 0.7), ("berlin", 0.6)],
+    "gravity": [("newton", 0.9), ("spacetime", 0.7)],
+    "spacetime": [("relativity", 0.9), ("gravity", 0.7)],
+    "england": [("newton", 0.7)],
+    "berlin": [("germany", 0.6)],
+}
+ENTITY_TO_CHUNKS = {
+    "einstein": ["c1", "c2"],
+    "relativity": ["c2", "c3"],
+    "newton": ["c4"],
+    "physics": ["c1", "c3", "c4"],
+}
+CHUNK_TEXTS = {
+    "c1": "Einstein was a physicist who developed the theory of relativity.",
+    "c2": "The theory of relativity was published by Einstein in 1905.",
+    "c3": "Relativity changed our understanding of physics and spacetime.",
+    "c4": "Newton developed classical mechanics and the law of gravity.",
+}
+# ── PPR Tests ─────────────────────────────────────────
+def test_ppr_basic():
+    scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20)
+    scores = scorer.compute_ppr(ADJACENCY, ["einstein"])
+    assert "einstein" in scores
+    assert scores["einstein"] > 0
+    assert scores.get("relativity", 0) > scores.get("berlin", 0)  # closer = higher
+def test_ppr_multiple_seeds():
+    scorer = PPRConfidenceScorer()
+    scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"])
+    assert scores.get("physics", 0) > 0  # connected to both seeds
+def test_ppr_empty():
+    scorer = PPRConfidenceScorer()
+    assert scorer.compute_ppr({}, []) == {}
+def test_ppr_context_scoring():
+    scorer = PPRConfidenceScorer()
+    ppr = scorer.compute_ppr(ADJACENCY, ["einstein"])
+    ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
+    assert len(ranked) > 0
+    assert ranked[0][2] >= ranked[-1][2]  # sorted descending
+# ── Token Budget Tests ────────────────────────────────
+def test_budget_basic():
+    ctrl = TokenBudgetController(max_tokens=50)
+    items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)]
+    selected, stats = ctrl.prune_context(items)
+    assert stats["used_tokens"] <= 50
+    assert stats["items_selected"] <= 2
+def test_budget_all_fit():
+    ctrl = TokenBudgetController(max_tokens=10000)
+    items = [("Hello.", 0.9), ("World.", 0.8)]
+    selected, stats = ctrl.prune_context(items)
+    assert len(selected) == 2
+    assert stats["reduction_pct"] >= 0
+def test_budget_priority():
+    ctrl = TokenBudgetController(max_tokens=20)
+    items = [("Low priority text.", 0.1), ("High priority!", 0.9)]
+    selected, stats = ctrl.prune_context(items)
+    assert "High priority!" in selected[0]  # highest score first
+def test_budget_stats():
+    ctrl = TokenBudgetController(max_tokens=100)
+    items = [("a " * 200, 0.9)]  # 400 chars ≈ 100 tokens
+    _, stats = ctrl.prune_context(items)
+    assert "budget_tokens" in stats
+    assert "reduction_pct" in stats
+# ── Path Pruner Tests ─────────────────────────────────
+def test_path_find():
+    adj_with_rel = {
+        "A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)],
+        "B": [("D", "LOCATED_IN", 0.8)],
+        "C": [("D", "PART_OF", 0.7)],
+    }
+    pruner = PathPruner()
+    paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3)
+    assert len(paths) >= 1
+def test_path_scoring():
+    pruner = PathPruner()
+    paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]]
+    weights = {("A", "B"): 0.9, ("B", "C"): 0.8}
+    scored = pruner.score_and_prune(paths, weights, threshold=0.1)
+    assert len(scored) == 1
+    assert scored[0][1] == 0.9 * 0.8  # product of edge weights
+def test_path_serialize():
+    pruner = PathPruner()
+    scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)]
+    text = pruner.serialize_paths(scored)
+    assert "Einstein" in text
+    assert "confidence: 0.720" in text
+# ── Spreading Activation Tests ────────────────────────
+def test_activation_basic():
+    sa = SpreadingActivation(decay_factor=0.7, max_steps=2)
+    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
+    assert acts["einstein"] == 1.0
+    assert acts.get("relativity", 0) > 0  # directly connected
+    assert acts.get("berlin", 0) < acts.get("physics", 0)  # further away
+def test_activation_ranking():
+    sa = SpreadingActivation()
+    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
+    ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
+    assert len(ranked) > 0
+    assert ranked[0][2] >= ranked[-1][2]
+def test_activation_decay():
+    sa = SpreadingActivation(decay_factor=0.5, max_steps=3)
+    acts = sa.activate(ADJACENCY, {"einstein": 1.0})
+    # Further nodes should have lower activation
+    assert acts.get("einstein", 0) >= acts.get("berlin", 0)
+# ── PolyG Router Tests ────────────────────────────────
+def test_router_entity_centric():
+    router = PolyGRouter()
+    result = router.classify_query("What is quantum physics?")
+    assert result["query_type"] == "entity_centric"
+    assert result["use_graph"] is True
+def test_router_multi_hop():
+    router = PolyGRouter()
+    result = router.classify_query("Were Einstein and Newton of the same nationality?")
+    assert result["query_type"] == "multi_hop"
+    assert result["strategy"] == "graph_traversal"
+def test_router_comparison():
+    router = PolyGRouter()
+    result = router.classify_query("Compare the theories of Einstein and Hawking")
+    assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"])
+def test_router_summarization():
+    router = PolyGRouter()
+    result = router.classify_query("Summarize the main themes of quantum physics")
+    assert result["strategy"] == "community_summary"
+def test_router_has_fields():
+    router = PolyGRouter()
+    result = router.classify_query("test query")
+    assert "strategy" in result
+    assert "confidence" in result
+    assert "reasoning" in result
+# ── Incremental Updater Tests ─────────────────────────
+def test_updater_scope():
+    updater = IncrementalGraphUpdater()
+    adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]}
+    affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2)
+    assert "A" in affected
+    assert "B" in affected
+    assert "D" in affected  # 2 hops from A
+def test_updater_plan():
+    updater = IncrementalGraphUpdater()
+    plan = updater.plan_update(
+        new_entities=[{"name": "X"}],
+        new_relations=[{"source": "X", "target": "Y"}],
+        existing_entity_count=100,
+    )
+    assert plan["new_entities"] == 1
+    assert plan["vs_full_rebuild_savings_pct"] > 90
+# ── NoveltyEngine Integration Test ───────────────────
+def test_novelty_engine():
+    engine = NoveltyEngine(token_budget=500)
+    result = engine.enhanced_retrieve(
+        query="What did Einstein discover?",
+        adjacency=ADJACENCY,
+        seed_entities=["einstein"],
+        entity_to_chunks=ENTITY_TO_CHUNKS,
+        chunk_texts=CHUNK_TEXTS,
+    )
+    assert "contexts" in result
+    assert "routing" in result
+    assert "budget_stats" in result
+    assert "technique_chain" in result
+    assert len(result["technique_chain"]) == 5
+    assert result["budget_stats"]["used_tokens"] <= 500
+if __name__ == "__main__":
+    import traceback
+    tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
+    passed = failed = 0
+    for name, fn in tests:
+        try:
+            fn()
+            print(f"  ✅ {name}")
+            passed += 1
+        except Exception as e:
+            print(f"  ❌ {name}: {e}")
+            traceback.print_exc()
+            failed += 1
+    print(f"\n{'='*50}")
+    print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total")
+    if failed == 0:
+        print("🎉 ALL NOVELTY TESTS PASSED!")