File size: 8,813 Bytes
e51378c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | """
Tests for GraphRAG Novelties Engine
Run: python tests/test_novelties.py
"""
import sys, os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from graphrag.layers.novelties import (
PPRConfidenceScorer, TokenBudgetController, PathPruner,
SpreadingActivation, PolyGRouter, IncrementalGraphUpdater,
NoveltyEngine,
)
# ββ Sample graph for testing ββββββββββββββββββββββββββ
ADJACENCY = {
"einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)],
"physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)],
"relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)],
"newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)],
"germany": [("einstein", 0.7), ("berlin", 0.6)],
"gravity": [("newton", 0.9), ("spacetime", 0.7)],
"spacetime": [("relativity", 0.9), ("gravity", 0.7)],
"england": [("newton", 0.7)],
"berlin": [("germany", 0.6)],
}
ENTITY_TO_CHUNKS = {
"einstein": ["c1", "c2"],
"relativity": ["c2", "c3"],
"newton": ["c4"],
"physics": ["c1", "c3", "c4"],
}
CHUNK_TEXTS = {
"c1": "Einstein was a physicist who developed the theory of relativity.",
"c2": "The theory of relativity was published by Einstein in 1905.",
"c3": "Relativity changed our understanding of physics and spacetime.",
"c4": "Newton developed classical mechanics and the law of gravity.",
}
# ββ PPR Tests βββββββββββββββββββββββββββββββββββββββββ
def test_ppr_basic():
scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20)
scores = scorer.compute_ppr(ADJACENCY, ["einstein"])
assert "einstein" in scores
assert scores["einstein"] > 0
assert scores.get("relativity", 0) > scores.get("berlin", 0) # closer = higher
def test_ppr_multiple_seeds():
scorer = PPRConfidenceScorer()
scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"])
assert scores.get("physics", 0) > 0 # connected to both seeds
def test_ppr_empty():
scorer = PPRConfidenceScorer()
assert scorer.compute_ppr({}, []) == {}
def test_ppr_context_scoring():
scorer = PPRConfidenceScorer()
ppr = scorer.compute_ppr(ADJACENCY, ["einstein"])
ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
assert len(ranked) > 0
assert ranked[0][2] >= ranked[-1][2] # sorted descending
# ββ Token Budget Tests ββββββββββββββββββββββββββββββββ
def test_budget_basic():
ctrl = TokenBudgetController(max_tokens=50)
items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)]
selected, stats = ctrl.prune_context(items)
assert stats["used_tokens"] <= 50
assert stats["items_selected"] <= 2
def test_budget_all_fit():
ctrl = TokenBudgetController(max_tokens=10000)
items = [("Hello.", 0.9), ("World.", 0.8)]
selected, stats = ctrl.prune_context(items)
assert len(selected) == 2
assert stats["reduction_pct"] >= 0
def test_budget_priority():
ctrl = TokenBudgetController(max_tokens=20)
items = [("Low priority text.", 0.1), ("High priority!", 0.9)]
selected, stats = ctrl.prune_context(items)
assert "High priority!" in selected[0] # highest score first
def test_budget_stats():
ctrl = TokenBudgetController(max_tokens=100)
items = [("a " * 200, 0.9)] # 400 chars β 100 tokens
_, stats = ctrl.prune_context(items)
assert "budget_tokens" in stats
assert "reduction_pct" in stats
# ββ Path Pruner Tests βββββββββββββββββββββββββββββββββ
def test_path_find():
adj_with_rel = {
"A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)],
"B": [("D", "LOCATED_IN", 0.8)],
"C": [("D", "PART_OF", 0.7)],
}
pruner = PathPruner()
paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3)
assert len(paths) >= 1
def test_path_scoring():
pruner = PathPruner()
paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]]
weights = {("A", "B"): 0.9, ("B", "C"): 0.8}
scored = pruner.score_and_prune(paths, weights, threshold=0.1)
assert len(scored) == 1
assert scored[0][1] == 0.9 * 0.8 # product of edge weights
def test_path_serialize():
pruner = PathPruner()
scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)]
text = pruner.serialize_paths(scored)
assert "Einstein" in text
assert "confidence: 0.720" in text
# ββ Spreading Activation Tests ββββββββββββββββββββββββ
def test_activation_basic():
sa = SpreadingActivation(decay_factor=0.7, max_steps=2)
acts = sa.activate(ADJACENCY, {"einstein": 1.0})
assert acts["einstein"] == 1.0
assert acts.get("relativity", 0) > 0 # directly connected
assert acts.get("berlin", 0) < acts.get("physics", 0) # further away
def test_activation_ranking():
sa = SpreadingActivation()
acts = sa.activate(ADJACENCY, {"einstein": 1.0})
ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
assert len(ranked) > 0
assert ranked[0][2] >= ranked[-1][2]
def test_activation_decay():
sa = SpreadingActivation(decay_factor=0.5, max_steps=3)
acts = sa.activate(ADJACENCY, {"einstein": 1.0})
# Further nodes should have lower activation
assert acts.get("einstein", 0) >= acts.get("berlin", 0)
# ββ PolyG Router Tests ββββββββββββββββββββββββββββββββ
def test_router_entity_centric():
router = PolyGRouter()
result = router.classify_query("What is quantum physics?")
assert result["query_type"] == "entity_centric"
assert result["use_graph"] is True
def test_router_multi_hop():
router = PolyGRouter()
result = router.classify_query("Were Einstein and Newton of the same nationality?")
assert result["query_type"] == "multi_hop"
assert result["strategy"] == "graph_traversal"
def test_router_comparison():
router = PolyGRouter()
result = router.classify_query("Compare the theories of Einstein and Hawking")
assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"])
def test_router_summarization():
router = PolyGRouter()
result = router.classify_query("Summarize the main themes of quantum physics")
assert result["strategy"] == "community_summary"
def test_router_has_fields():
router = PolyGRouter()
result = router.classify_query("test query")
assert "strategy" in result
assert "confidence" in result
assert "reasoning" in result
# ββ Incremental Updater Tests βββββββββββββββββββββββββ
def test_updater_scope():
updater = IncrementalGraphUpdater()
adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]}
affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2)
assert "A" in affected
assert "B" in affected
assert "D" in affected # 2 hops from A
def test_updater_plan():
updater = IncrementalGraphUpdater()
plan = updater.plan_update(
new_entities=[{"name": "X"}],
new_relations=[{"source": "X", "target": "Y"}],
existing_entity_count=100,
)
assert plan["new_entities"] == 1
assert plan["vs_full_rebuild_savings_pct"] > 90
# ββ NoveltyEngine Integration Test βββββββββββββββββββ
def test_novelty_engine():
engine = NoveltyEngine(token_budget=500)
result = engine.enhanced_retrieve(
query="What did Einstein discover?",
adjacency=ADJACENCY,
seed_entities=["einstein"],
entity_to_chunks=ENTITY_TO_CHUNKS,
chunk_texts=CHUNK_TEXTS,
)
assert "contexts" in result
assert "routing" in result
assert "budget_stats" in result
assert "technique_chain" in result
assert len(result["technique_chain"]) == 5
assert result["budget_stats"]["used_tokens"] <= 500
if __name__ == "__main__":
import traceback
tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
passed = failed = 0
for name, fn in tests:
try:
fn()
print(f" β
{name}")
passed += 1
except Exception as e:
print(f" β {name}: {e}")
traceback.print_exc()
failed += 1
print(f"\n{'='*50}")
print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total")
if failed == 0:
print("π ALL NOVELTY TESTS PASSED!")
|