Add unit tests for all 6 novelty techniques"

e51378c verified 12 days ago

8.81 kB

	"""
	Tests for GraphRAG Novelties Engine
	Run: python tests/test_novelties.py
	"""
	import sys, os
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from graphrag.layers.novelties import (
	PPRConfidenceScorer, TokenBudgetController, PathPruner,
	SpreadingActivation, PolyGRouter, IncrementalGraphUpdater,
	NoveltyEngine,
	)

	# ── Sample graph for testing ──────────────────────────

	ADJACENCY = {
	"einstein": [("physics", 0.9), ("germany", 0.7), ("relativity", 0.95)],
	"physics": [("einstein", 0.9), ("newton", 0.8), ("relativity", 0.85)],
	"relativity": [("einstein", 0.95), ("physics", 0.85), ("spacetime", 0.9)],
	"newton": [("physics", 0.8), ("gravity", 0.9), ("england", 0.7)],
	"germany": [("einstein", 0.7), ("berlin", 0.6)],
	"gravity": [("newton", 0.9), ("spacetime", 0.7)],
	"spacetime": [("relativity", 0.9), ("gravity", 0.7)],
	"england": [("newton", 0.7)],
	"berlin": [("germany", 0.6)],
	}

	ENTITY_TO_CHUNKS = {
	"einstein": ["c1", "c2"],
	"relativity": ["c2", "c3"],
	"newton": ["c4"],
	"physics": ["c1", "c3", "c4"],
	}

	CHUNK_TEXTS = {
	"c1": "Einstein was a physicist who developed the theory of relativity.",
	"c2": "The theory of relativity was published by Einstein in 1905.",
	"c3": "Relativity changed our understanding of physics and spacetime.",
	"c4": "Newton developed classical mechanics and the law of gravity.",
	}


	# ── PPR Tests ─────────────────────────────────────────

	def test_ppr_basic():
	scorer = PPRConfidenceScorer(damping=0.85, max_iterations=20)
	scores = scorer.compute_ppr(ADJACENCY, ["einstein"])
	assert "einstein" in scores
	assert scores["einstein"] > 0
	assert scores.get("relativity", 0) > scores.get("berlin", 0) # closer = higher

	def test_ppr_multiple_seeds():
	scorer = PPRConfidenceScorer()
	scores = scorer.compute_ppr(ADJACENCY, ["einstein", "newton"])
	assert scores.get("physics", 0) > 0 # connected to both seeds

	def test_ppr_empty():
	scorer = PPRConfidenceScorer()
	assert scorer.compute_ppr({}, []) == {}

	def test_ppr_context_scoring():
	scorer = PPRConfidenceScorer()
	ppr = scorer.compute_ppr(ADJACENCY, ["einstein"])
	ranked = scorer.score_contexts(ppr, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
	assert len(ranked) > 0
	assert ranked[0][2] >= ranked[-1][2] # sorted descending


	# ── Token Budget Tests ────────────────────────────────

	def test_budget_basic():
	ctrl = TokenBudgetController(max_tokens=50)
	items = [("Short text.", 0.9), ("A much longer text that takes more tokens.", 0.5)]
	selected, stats = ctrl.prune_context(items)
	assert stats["used_tokens"] <= 50
	assert stats["items_selected"] <= 2

	def test_budget_all_fit():
	ctrl = TokenBudgetController(max_tokens=10000)
	items = [("Hello.", 0.9), ("World.", 0.8)]
	selected, stats = ctrl.prune_context(items)
	assert len(selected) == 2
	assert stats["reduction_pct"] >= 0

	def test_budget_priority():
	ctrl = TokenBudgetController(max_tokens=20)
	items = [("Low priority text.", 0.1), ("High priority!", 0.9)]
	selected, stats = ctrl.prune_context(items)
	assert "High priority!" in selected[0] # highest score first

	def test_budget_stats():
	ctrl = TokenBudgetController(max_tokens=100)
	items = [("a " * 200, 0.9)] # 400 chars ≈ 100 tokens
	_, stats = ctrl.prune_context(items)
	assert "budget_tokens" in stats
	assert "reduction_pct" in stats


	# ── Path Pruner Tests ─────────────────────────────────

	def test_path_find():
	adj_with_rel = {
	"A": [("B", "KNOWS", 0.9), ("C", "WORKS_AT", 0.5)],
	"B": [("D", "LOCATED_IN", 0.8)],
	"C": [("D", "PART_OF", 0.7)],
	}
	pruner = PathPruner()
	paths = pruner.find_paths(adj_with_rel, "A", "D", max_depth=3)
	assert len(paths) >= 1

	def test_path_scoring():
	pruner = PathPruner()
	paths = [[("A", "KNOWS", "B"), ("B", "IN", "C")]]
	weights = {("A", "B"): 0.9, ("B", "C"): 0.8}
	scored = pruner.score_and_prune(paths, weights, threshold=0.1)
	assert len(scored) == 1
	assert scored[0][1] == 0.9 * 0.8 # product of edge weights

	def test_path_serialize():
	pruner = PathPruner()
	scored = [([("Einstein", "DEVELOPED", "Relativity"), ("Relativity", "EXPLAINS", "Spacetime")], 0.72)]
	text = pruner.serialize_paths(scored)
	assert "Einstein" in text
	assert "confidence: 0.720" in text


	# ── Spreading Activation Tests ────────────────────────

	def test_activation_basic():
	sa = SpreadingActivation(decay_factor=0.7, max_steps=2)
	acts = sa.activate(ADJACENCY, {"einstein": 1.0})
	assert acts["einstein"] == 1.0
	assert acts.get("relativity", 0) > 0 # directly connected
	assert acts.get("berlin", 0) < acts.get("physics", 0) # further away

	def test_activation_ranking():
	sa = SpreadingActivation()
	acts = sa.activate(ADJACENCY, {"einstein": 1.0})
	ranked = sa.rank_contexts(acts, ENTITY_TO_CHUNKS, CHUNK_TEXTS)
	assert len(ranked) > 0
	assert ranked[0][2] >= ranked[-1][2]

	def test_activation_decay():
	sa = SpreadingActivation(decay_factor=0.5, max_steps=3)
	acts = sa.activate(ADJACENCY, {"einstein": 1.0})
	# Further nodes should have lower activation
	assert acts.get("einstein", 0) >= acts.get("berlin", 0)


	# ── PolyG Router Tests ────────────────────────────────

	def test_router_entity_centric():
	router = PolyGRouter()
	result = router.classify_query("What is quantum physics?")
	assert result["query_type"] == "entity_centric"
	assert result["use_graph"] is True

	def test_router_multi_hop():
	router = PolyGRouter()
	result = router.classify_query("Were Einstein and Newton of the same nationality?")
	assert result["query_type"] == "multi_hop"
	assert result["strategy"] == "graph_traversal"

	def test_router_comparison():
	router = PolyGRouter()
	result = router.classify_query("Compare the theories of Einstein and Hawking")
	assert "multi_hop" in result["query_type"] or "comparison" in str(result["scores"])

	def test_router_summarization():
	router = PolyGRouter()
	result = router.classify_query("Summarize the main themes of quantum physics")
	assert result["strategy"] == "community_summary"

	def test_router_has_fields():
	router = PolyGRouter()
	result = router.classify_query("test query")
	assert "strategy" in result
	assert "confidence" in result
	assert "reasoning" in result


	# ── Incremental Updater Tests ─────────────────────────

	def test_updater_scope():
	updater = IncrementalGraphUpdater()
	adj = {"A": ["B", "C"], "B": ["D"], "C": ["E"]}
	affected = updater.compute_affected_scope({"A"}, adj, scope_hops=2)
	assert "A" in affected
	assert "B" in affected
	assert "D" in affected # 2 hops from A

	def test_updater_plan():
	updater = IncrementalGraphUpdater()
	plan = updater.plan_update(
	new_entities=[{"name": "X"}],
	new_relations=[{"source": "X", "target": "Y"}],
	existing_entity_count=100,
	)
	assert plan["new_entities"] == 1
	assert plan["vs_full_rebuild_savings_pct"] > 90


	# ── NoveltyEngine Integration Test ───────────────────

	def test_novelty_engine():
	engine = NoveltyEngine(token_budget=500)
	result = engine.enhanced_retrieve(
	query="What did Einstein discover?",
	adjacency=ADJACENCY,
	seed_entities=["einstein"],
	entity_to_chunks=ENTITY_TO_CHUNKS,
	chunk_texts=CHUNK_TEXTS,
	)
	assert "contexts" in result
	assert "routing" in result
	assert "budget_stats" in result
	assert "technique_chain" in result
	assert len(result["technique_chain"]) == 5
	assert result["budget_stats"]["used_tokens"] <= 500


	if __name__ == "__main__":
	import traceback
	tests = [(k, v) for k, v in sorted(globals().items()) if k.startswith("test_") and callable(v)]
	passed = failed = 0
	for name, fn in tests:
	try:
	fn()
	print(f" ✅ {name}")
	passed += 1
	except Exception as e:
	print(f" ❌ {name}: {e}")
	traceback.print_exc()
	failed += 1
	print(f"\n{'='*50}")
	print(f"Novelty Tests: {passed} passed, {failed} failed, {passed+failed} total")
	if failed == 0:
	print("🎉 ALL NOVELTY TESTS PASSED!")