muthuk1 commited on
Commit
e51378c
Β·
verified Β·
1 Parent(s): fe766f7

Add unit tests for all 6 novelty techniques"

Browse files
Files changed (1) hide show
  1. tests/test_novelties.py +238 -0
tests/test_novelties.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tests for GraphRAG Novelties Engine
3
+ Run: python tests/test_novelties.py
4
+ """
5
+ import sys, os
6
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+ from graphrag.layers.novelties import (
9
+ PPRConfidenceScorer, TokenBudgetController, PathPruner,
10
+ SpreadingActivation, PolyGRouter, IncrementalGraphUpdater,
11
+ NoveltyEngine,
12
+ )
13
+
14
+ # ── Sample graph for testing ──────────────────────────
15
+
16
+ ADJACENCY = {
17
+ "einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)],
18
+ "physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)],
19
+ "relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)],
20
+ "newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)],
21
+ "germany": [("einstein", 0.7), ("berlin", 0.6)],
22
+ "gravity": [("newton", 0.9), ("spacetime", 0.7)],
23
+ "spacetime": [("relativity", 0.9), ("gravity", 0.7)],
24
+ "england": [("newton", 0.7)],
25
+ "berlin": [("germany", 0.6)],
26
+ }
27
+
28
+ ENTITY_TO_CHUNKS = {
29
+ "einstein": ["c1", "c2"],
30
+ "relativity": ["c2", "c3"],
31
+ "newton": ["c4"],
32
+ "physics": ["c1", "c3", "c4"],
33
+ }
34
+
35
+ CHUNK_TEXTS = {
36
+ "c1": "Einstein was a physicist who developed the theory of relativity.",
37
+ "c2": "The theory of relativity was published by Einstein in 1905.",
38
+ "c3": "Relativity changed our understanding of physics and spacetime.",
39
+ "c4": "Newton developed classical mechanics and the law of gravity.",
40
+ }
41
+
42
+
43
+ # ── PPR Tests ─────────────────────────────────────────
44
+
45
+ def test_ppr_basic():
46
+ scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20)
47
+ scores = scorer.compute_ppr(ADJACENCY, ["einstein"])
48
+ assert "einstein" in scores
49
+ assert scores["einstein"] > 0
50
+ assert scores.get("relativity", 0) > scores.get("berlin", 0) # closer = higher
51
+
52
+ def test_ppr_multiple_seeds():
53
+ scorer = PPRConfidenceScorer()
54
+ scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"])
55
+ assert scores.get("physics", 0) > 0 # connected to both seeds
56
+
57
+ def test_ppr_empty():
58
+ scorer = PPRConfidenceScorer()
59
+ assert scorer.compute_ppr({}, []) == {}
60
+
61
+ def test_ppr_context_scoring():
62
+ scorer = PPRConfidenceScorer()
63
+ ppr = scorer.compute_ppr(ADJACENCY, ["einstein"])
64
+ ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
65
+ assert len(ranked) > 0
66
+ assert ranked[0][2] >= ranked[-1][2] # sorted descending
67
+
68
+
69
+ # ── Token Budget Tests ────────────────────────────────
70
+
71
+ def test_budget_basic():
72
+ ctrl = TokenBudgetController(max_tokens=50)
73
+ items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)]
74
+ selected, stats = ctrl.prune_context(items)
75
+ assert stats["used_tokens"] <= 50
76
+ assert stats["items_selected"] <= 2
77
+
78
+ def test_budget_all_fit():
79
+ ctrl = TokenBudgetController(max_tokens=10000)
80
+ items = [("Hello.", 0.9), ("World.", 0.8)]
81
+ selected, stats = ctrl.prune_context(items)
82
+ assert len(selected) == 2
83
+ assert stats["reduction_pct"] >= 0
84
+
85
+ def test_budget_priority():
86
+ ctrl = TokenBudgetController(max_tokens=20)
87
+ items = [("Low priority text.", 0.1), ("High priority!", 0.9)]
88
+ selected, stats = ctrl.prune_context(items)
89
+ assert "High priority!" in selected[0] # highest score first
90
+
91
+ def test_budget_stats():
92
+ ctrl = TokenBudgetController(max_tokens=100)
93
+ items = [("a " * 200, 0.9)] # 400 chars β‰ˆ 100 tokens
94
+ _, stats = ctrl.prune_context(items)
95
+ assert "budget_tokens" in stats
96
+ assert "reduction_pct" in stats
97
+
98
+
99
+ # ── Path Pruner Tests ─────────────────────────────────
100
+
101
+ def test_path_find():
102
+ adj_with_rel = {
103
+ "A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)],
104
+ "B": [("D", "LOCATED_IN", 0.8)],
105
+ "C": [("D", "PART_OF", 0.7)],
106
+ }
107
+ pruner = PathPruner()
108
+ paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3)
109
+ assert len(paths) >= 1
110
+
111
+ def test_path_scoring():
112
+ pruner = PathPruner()
113
+ paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]]
114
+ weights = {("A", "B"): 0.9, ("B", "C"): 0.8}
115
+ scored = pruner.score_and_prune(paths, weights, threshold=0.1)
116
+ assert len(scored) == 1
117
+ assert scored[0][1] == 0.9 * 0.8 # product of edge weights
118
+
119
+ def test_path_serialize():
120
+ pruner = PathPruner()
121
+ scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)]
122
+ text = pruner.serialize_paths(scored)
123
+ assert "Einstein" in text
124
+ assert "confidence: 0.720" in text
125
+
126
+
127
+ # ── Spreading Activation Tests ────────────────────────
128
+
129
+ def test_activation_basic():
130
+ sa = SpreadingActivation(decay_factor=0.7, max_steps=2)
131
+ acts = sa.activate(ADJACENCY, {"einstein": 1.0})
132
+ assert acts["einstein"] == 1.0
133
+ assert acts.get("relativity", 0) > 0 # directly connected
134
+ assert acts.get("berlin", 0) < acts.get("physics", 0) # further away
135
+
136
+ def test_activation_ranking():
137
+ sa = SpreadingActivation()
138
+ acts = sa.activate(ADJACENCY, {"einstein": 1.0})
139
+ ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
140
+ assert len(ranked) > 0
141
+ assert ranked[0][2] >= ranked[-1][2]
142
+
143
+ def test_activation_decay():
144
+ sa = SpreadingActivation(decay_factor=0.5, max_steps=3)
145
+ acts = sa.activate(ADJACENCY, {"einstein": 1.0})
146
+ # Further nodes should have lower activation
147
+ assert acts.get("einstein", 0) >= acts.get("berlin", 0)
148
+
149
+
150
+ # ── PolyG Router Tests ────────────────────────────────
151
+
152
+ def test_router_entity_centric():
153
+ router = PolyGRouter()
154
+ result = router.classify_query("What is quantum physics?")
155
+ assert result["query_type"] == "entity_centric"
156
+ assert result["use_graph"] is True
157
+
158
+ def test_router_multi_hop():
159
+ router = PolyGRouter()
160
+ result = router.classify_query("Were Einstein and Newton of the same nationality?")
161
+ assert result["query_type"] == "multi_hop"
162
+ assert result["strategy"] == "graph_traversal"
163
+
164
+ def test_router_comparison():
165
+ router = PolyGRouter()
166
+ result = router.classify_query("Compare the theories of Einstein and Hawking")
167
+ assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"])
168
+
169
+ def test_router_summarization():
170
+ router = PolyGRouter()
171
+ result = router.classify_query("Summarize the main themes of quantum physics")
172
+ assert result["strategy"] == "community_summary"
173
+
174
+ def test_router_has_fields():
175
+ router = PolyGRouter()
176
+ result = router.classify_query("test query")
177
+ assert "strategy" in result
178
+ assert "confidence" in result
179
+ assert "reasoning" in result
180
+
181
+
182
+ # ── Incremental Updater Tests ─────────────────────────
183
+
184
+ def test_updater_scope():
185
+ updater = IncrementalGraphUpdater()
186
+ adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]}
187
+ affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2)
188
+ assert "A" in affected
189
+ assert "B" in affected
190
+ assert "D" in affected # 2 hops from A
191
+
192
+ def test_updater_plan():
193
+ updater = IncrementalGraphUpdater()
194
+ plan = updater.plan_update(
195
+ new_entities=[{"name": "X"}],
196
+ new_relations=[{"source": "X", "target": "Y"}],
197
+ existing_entity_count=100,
198
+ )
199
+ assert plan["new_entities"] == 1
200
+ assert plan["vs_full_rebuild_savings_pct"] > 90
201
+
202
+
203
+ # ── NoveltyEngine Integration Test ───────────────────
204
+
205
+ def test_novelty_engine():
206
+ engine = NoveltyEngine(token_budget=500)
207
+ result = engine.enhanced_retrieve(
208
+ query="What did Einstein discover?",
209
+ adjacency=ADJACENCY,
210
+ seed_entities=["einstein"],
211
+ entity_to_chunks=ENTITY_TO_CHUNKS,
212
+ chunk_texts=CHUNK_TEXTS,
213
+ )
214
+ assert "contexts" in result
215
+ assert "routing" in result
216
+ assert "budget_stats" in result
217
+ assert "technique_chain" in result
218
+ assert len(result["technique_chain"]) == 5
219
+ assert result["budget_stats"]["used_tokens"] <= 500
220
+
221
+
222
+ if __name__ == "__main__":
223
+ import traceback
224
+ tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
225
+ passed = failed = 0
226
+ for name, fn in tests:
227
+ try:
228
+ fn()
229
+ print(f" βœ… {name}")
230
+ passed += 1
231
+ except Exception as e:
232
+ print(f" ❌ {name}: {e}")
233
+ traceback.print_exc()
234
+ failed += 1
235
+ print(f"\n{'='*50}")
236
+ print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total")
237
+ if failed == 0:
238
+ print("πŸŽ‰ ALL NOVELTY TESTS PASSED!")