| """ |
| Tests for GraphRAG Novelties Engine |
| Run: python tests/test_novelties.py |
| """ |
| import sys, os |
| sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) |
|
|
| from graphrag.layers.novelties import ( |
| PPRConfidenceScorer, TokenBudgetController, PathPruner, |
| SpreadingActivation, PolyGRouter, IncrementalGraphUpdater, |
| NoveltyEngine, |
| ) |
|
|
| |
|
|
| ADJACENCY = { |
| "einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)], |
| "physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)], |
| "relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)], |
| "newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)], |
| "germany": [("einstein", 0.7), ("berlin", 0.6)], |
| "gravity": [("newton", 0.9), ("spacetime", 0.7)], |
| "spacetime": [("relativity", 0.9), ("gravity", 0.7)], |
| "england": [("newton", 0.7)], |
| "berlin": [("germany", 0.6)], |
| } |
|
|
| ENTITY_TO_CHUNKS = { |
| "einstein": ["c1", "c2"], |
| "relativity": ["c2", "c3"], |
| "newton": ["c4"], |
| "physics": ["c1", "c3", "c4"], |
| } |
|
|
| CHUNK_TEXTS = { |
| "c1": "Einstein was a physicist who developed the theory of relativity.", |
| "c2": "The theory of relativity was published by Einstein in 1905.", |
| "c3": "Relativity changed our understanding of physics and spacetime.", |
| "c4": "Newton developed classical mechanics and the law of gravity.", |
| } |
|
|
|
|
| |
|
|
| def test_ppr_basic(): |
| scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20) |
| scores = scorer.compute_ppr(ADJACENCY, ["einstein"]) |
| assert "einstein" in scores |
| assert scores["einstein"] > 0 |
| assert scores.get("relativity", 0) > scores.get("berlin", 0) |
|
|
| def test_ppr_multiple_seeds(): |
| scorer = PPRConfidenceScorer() |
| scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"]) |
| assert scores.get("physics", 0) > 0 |
|
|
| def test_ppr_empty(): |
| scorer = PPRConfidenceScorer() |
| assert scorer.compute_ppr({}, []) == {} |
|
|
| def test_ppr_context_scoring(): |
| scorer = PPRConfidenceScorer() |
| ppr = scorer.compute_ppr(ADJACENCY, ["einstein"]) |
| ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS) |
| assert len(ranked) > 0 |
| assert ranked[0][2] >= ranked[-1][2] |
|
|
|
|
| |
|
|
| def test_budget_basic(): |
| ctrl = TokenBudgetController(max_tokens=50) |
| items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)] |
| selected, stats = ctrl.prune_context(items) |
| assert stats["used_tokens"] <= 50 |
| assert stats["items_selected"] <= 2 |
|
|
| def test_budget_all_fit(): |
| ctrl = TokenBudgetController(max_tokens=10000) |
| items = [("Hello.", 0.9), ("World.", 0.8)] |
| selected, stats = ctrl.prune_context(items) |
| assert len(selected) == 2 |
| assert stats["reduction_pct"] >= 0 |
|
|
| def test_budget_priority(): |
| ctrl = TokenBudgetController(max_tokens=20) |
| items = [("Low priority text.", 0.1), ("High priority!", 0.9)] |
| selected, stats = ctrl.prune_context(items) |
| assert "High priority!" in selected[0] |
|
|
| def test_budget_stats(): |
| ctrl = TokenBudgetController(max_tokens=100) |
| items = [("a " * 200, 0.9)] |
| _, stats = ctrl.prune_context(items) |
| assert "budget_tokens" in stats |
| assert "reduction_pct" in stats |
|
|
|
|
| |
|
|
| def test_path_find(): |
| adj_with_rel = { |
| "A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)], |
| "B": [("D", "LOCATED_IN", 0.8)], |
| "C": [("D", "PART_OF", 0.7)], |
| } |
| pruner = PathPruner() |
| paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3) |
| assert len(paths) >= 1 |
|
|
| def test_path_scoring(): |
| pruner = PathPruner() |
| paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]] |
| weights = {("A", "B"): 0.9, ("B", "C"): 0.8} |
| scored = pruner.score_and_prune(paths, weights, threshold=0.1) |
| assert len(scored) == 1 |
| assert scored[0][1] == 0.9 * 0.8 |
|
|
| def test_path_serialize(): |
| pruner = PathPruner() |
| scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)] |
| text = pruner.serialize_paths(scored) |
| assert "Einstein" in text |
| assert "confidence: 0.720" in text |
|
|
|
|
| |
|
|
| def test_activation_basic(): |
| sa = SpreadingActivation(decay_factor=0.7, max_steps=2) |
| acts = sa.activate(ADJACENCY, {"einstein": 1.0}) |
| assert acts["einstein"] == 1.0 |
| assert acts.get("relativity", 0) > 0 |
| assert acts.get("berlin", 0) < acts.get("physics", 0) |
|
|
| def test_activation_ranking(): |
| sa = SpreadingActivation() |
| acts = sa.activate(ADJACENCY, {"einstein": 1.0}) |
| ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS) |
| assert len(ranked) > 0 |
| assert ranked[0][2] >= ranked[-1][2] |
|
|
| def test_activation_decay(): |
| sa = SpreadingActivation(decay_factor=0.5, max_steps=3) |
| acts = sa.activate(ADJACENCY, {"einstein": 1.0}) |
| |
| assert acts.get("einstein", 0) >= acts.get("berlin", 0) |
|
|
|
|
| |
|
|
| def test_router_entity_centric(): |
| router = PolyGRouter() |
| result = router.classify_query("What is quantum physics?") |
| assert result["query_type"] == "entity_centric" |
| assert result["use_graph"] is True |
|
|
| def test_router_multi_hop(): |
| router = PolyGRouter() |
| result = router.classify_query("Were Einstein and Newton of the same nationality?") |
| assert result["query_type"] == "multi_hop" |
| assert result["strategy"] == "graph_traversal" |
|
|
| def test_router_comparison(): |
| router = PolyGRouter() |
| result = router.classify_query("Compare the theories of Einstein and Hawking") |
| assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"]) |
|
|
| def test_router_summarization(): |
| router = PolyGRouter() |
| result = router.classify_query("Summarize the main themes of quantum physics") |
| assert result["strategy"] == "community_summary" |
|
|
| def test_router_has_fields(): |
| router = PolyGRouter() |
| result = router.classify_query("test query") |
| assert "strategy" in result |
| assert "confidence" in result |
| assert "reasoning" in result |
|
|
|
|
| |
|
|
| def test_updater_scope(): |
| updater = IncrementalGraphUpdater() |
| adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]} |
| affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2) |
| assert "A" in affected |
| assert "B" in affected |
| assert "D" in affected |
|
|
| def test_updater_plan(): |
| updater = IncrementalGraphUpdater() |
| plan = updater.plan_update( |
| new_entities=[{"name": "X"}], |
| new_relations=[{"source": "X", "target": "Y"}], |
| existing_entity_count=100, |
| ) |
| assert plan["new_entities"] == 1 |
| assert plan["vs_full_rebuild_savings_pct"] > 90 |
|
|
|
|
| |
|
|
| def test_novelty_engine(): |
| engine = NoveltyEngine(token_budget=500) |
| result = engine.enhanced_retrieve( |
| query="What did Einstein discover?", |
| adjacency=ADJACENCY, |
| seed_entities=["einstein"], |
| entity_to_chunks=ENTITY_TO_CHUNKS, |
| chunk_texts=CHUNK_TEXTS, |
| ) |
| assert "contexts" in result |
| assert "routing" in result |
| assert "budget_stats" in result |
| assert "technique_chain" in result |
| assert len(result["technique_chain"]) == 5 |
| assert result["budget_stats"]["used_tokens"] <= 500 |
|
|
|
|
| if __name__ == "__main__": |
| import traceback |
| tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)] |
| passed = failed = 0 |
| for name, fn in tests: |
| try: |
| fn() |
| print(f" β
{name}") |
| passed += 1 |
| except Exception as e: |
| print(f" β {name}: {e}") |
| traceback.print_exc() |
| failed += 1 |
| print(f"\n{'='*50}") |
| print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total") |
| if failed == 0: |
| print("π ALL NOVELTY TESTS PASSED!") |
|
|