Add comprehensive unit tests for core functions

11bcc35 verified 12 days ago

8.63 kB

	"""
	Unit Tests — GraphRAG Inference Hackathon
	==========================================
	Tests for core utility functions across all layers.
	Run: python -m pytest tests/ -v
	"""
	import sys
	import os
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))


	# ── Layer 1: Graph Layer Tests ─────────────────────────

	def test_cosine_similarity_identical():
	from graphrag.layers.graph_layer import cosine_similarity
	assert cosine_similarity([1, 0, 0], [1, 0, 0]) == 1.0

	def test_cosine_similarity_orthogonal():
	from graphrag.layers.graph_layer import cosine_similarity
	assert cosine_similarity([1, 0, 0], [0, 1, 0]) == 0.0

	def test_cosine_similarity_opposite():
	from graphrag.layers.graph_layer import cosine_similarity
	assert abs(cosine_similarity([1, 0], [-1, 0]) - (-1.0)) < 1e-9

	def test_cosine_similarity_zero_vector():
	from graphrag.layers.graph_layer import cosine_similarity
	assert cosine_similarity([0, 0, 0], [1, 2, 3]) == 0.0

	def test_cosine_similarity_mismatched_lengths():
	from graphrag.layers.graph_layer import cosine_similarity
	assert cosine_similarity([1, 2], [1, 2, 3]) == 0.0

	def test_chunk_text_basic():
	from graphrag.layers.graph_layer import chunk_text
	text = "Hello world. " * 100
	chunks = chunk_text(text, chunk_size=200, overlap=20)
	assert len(chunks) > 1
	assert all(len(c) <= 220 for c in chunks) # allow slight overshoot for sentence boundary

	def test_chunk_text_empty():
	from graphrag.layers.graph_layer import chunk_text
	assert chunk_text("") == []
	assert chunk_text(None) == []

	def test_chunk_text_short():
	from graphrag.layers.graph_layer import chunk_text
	result = chunk_text("Short text.", chunk_size=1000)
	assert len(result) == 1
	assert result[0] == "Short text."

	def test_chunk_text_overlap():
	from graphrag.layers.graph_layer import chunk_text
	text = "A" * 500 + " " + "B" * 500
	chunks = chunk_text(text, chunk_size=300, overlap=50)
	assert len(chunks) >= 3

	def test_generate_entity_id_deterministic():
	from graphrag.layers.graph_layer import generate_entity_id
	id1 = generate_entity_id("Albert Einstein", "PERSON")
	id2 = generate_entity_id("Albert Einstein", "PERSON")
	assert id1 == id2

	def test_generate_entity_id_case_insensitive():
	from graphrag.layers.graph_layer import generate_entity_id
	id1 = generate_entity_id("Albert Einstein", "PERSON")
	id2 = generate_entity_id("albert einstein", "person")
	assert id1 == id2

	def test_generate_entity_id_different_types():
	from graphrag.layers.graph_layer import generate_entity_id
	id1 = generate_entity_id("Apple", "ORGANIZATION")
	id2 = generate_entity_id("Apple", "PRODUCT")
	assert id1 != id2

	def test_generate_chunk_id():
	from graphrag.layers.graph_layer import generate_chunk_id
	assert generate_chunk_id("doc1", 0) == "doc1_chunk_0000"
	assert generate_chunk_id("doc1", 42) == "doc1_chunk_0042"


	# ── Layer 4: Evaluation Tests ─────────────────────────

	def test_normalize_answer():
	from graphrag.layers.evaluation_layer import normalize_answer
	assert normalize_answer("The Answer") == "answer"
	assert normalize_answer(" a big space ") == "big space"
	assert normalize_answer("Hello, World!") == "hello world"

	def test_compute_f1_perfect():
	from graphrag.layers.evaluation_layer import compute_f1
	assert compute_f1("the cat sat", "the cat sat") == 1.0

	def test_compute_f1_partial():
	from graphrag.layers.evaluation_layer import compute_f1
	score = compute_f1("the cat sat on the mat", "the cat sat")
	assert 0.5 < score < 1.0

	def test_compute_f1_no_overlap():
	from graphrag.layers.evaluation_layer import compute_f1
	assert compute_f1("dogs run fast", "cats sit quietly") == 0.0

	def test_compute_f1_empty():
	from graphrag.layers.evaluation_layer import compute_f1
	assert compute_f1("", "") == 1.0
	assert compute_f1("something", "") == 0.0
	assert compute_f1("", "something") == 0.0

	def test_compute_exact_match():
	from graphrag.layers.evaluation_layer import compute_exact_match
	assert compute_exact_match("Yes", "yes") == 1.0
	assert compute_exact_match("The answer", "the answer") == 1.0
	assert compute_exact_match("Yes", "No") == 0.0

	def test_compute_context_hit_rate():
	from graphrag.layers.evaluation_layer import compute_context_hit_rate
	contexts = ["Einstein was born in Germany.", "He developed relativity."]
	facts = ["Einstein was born in Germany.", "He won Nobel Prize."]
	rate = compute_context_hit_rate(contexts, facts)
	assert rate == 0.5

	def test_compute_context_hit_rate_empty():
	from graphrag.layers.evaluation_layer import compute_context_hit_rate
	assert compute_context_hit_rate([], []) == 0.0
	assert compute_context_hit_rate(["something"], []) == 0.0

	def test_compute_token_efficiency():
	from graphrag.layers.evaluation_layer import compute_token_efficiency
	assert compute_token_efficiency(100, 250) == 2.5
	assert compute_token_efficiency(100, 50) == 0.5
	assert compute_token_efficiency(0, 100) == 0.0


	# ── Universal LLM Tests ──────────────────────────────

	def test_provider_registry_completeness():
	from graphrag.layers.universal_llm import PROVIDERS
	expected = {"openai", "anthropic", "gemini", "mistral", "cohere",
	"ollama", "openrouter", "groq", "xai", "together",
	"huggingface", "deepseek"}
	assert set(PROVIDERS.keys()) == expected

	def test_provider_has_required_fields():
	from graphrag.layers.universal_llm import PROVIDERS
	for pid, cfg in PROVIDERS.items():
	assert "name" in cfg, f"{pid} missing name"
	assert "default_model" in cfg, f"{pid} missing default_model"
	assert "litellm_prefix" in cfg, f"{pid} missing litellm_prefix"
	assert "cost_input" in cfg, f"{pid} missing cost_input"
	assert "cost_output" in cfg, f"{pid} missing cost_output"

	def test_ollama_is_free():
	from graphrag.layers.universal_llm import PROVIDERS
	ollama = PROVIDERS["ollama"]
	assert ollama["cost_input"] == 0
	assert ollama["cost_output"] == 0
	assert ollama.get("is_local") is True

	def test_get_available_providers_includes_ollama():
	from graphrag.layers.universal_llm import get_available_providers
	available = get_available_providers()
	assert "ollama" in available # always included as local


	# ── Evaluation Layer Aggregate Tests ──────────────────

	def test_evaluation_layer_aggregate():
	from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
	evl = EvaluationLayer()
	sample = EvalSample(
	query="test?", reference_answer="yes",
	baseline_answer="yes", graphrag_answer="yes indeed",
	question_type="factoid", difficulty="easy",
	)
	evl.evaluate_sample(sample, baseline_tokens=100, graphrag_tokens=200,
	baseline_cost=0.001, graphrag_cost=0.002)
	agg = evl.compute_aggregate_metrics()
	assert agg["num_samples"] == 1
	assert agg["baseline"]["avg_f1"] > 0
	assert agg["graphrag"]["avg_f1"] > 0

	def test_evaluation_layer_report():
	from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
	evl = EvaluationLayer()
	for i in range(3):
	sample = EvalSample(query=f"q{i}?", reference_answer="answer",
	baseline_answer="answer", graphrag_answer="answer",
	question_type="bridge" if i % 2 == 0 else "comparison")
	evl.evaluate_sample(sample, baseline_tokens=100+i10, graphrag_tokens=200+i20)
	report = evl.generate_report()
	assert "BENCHMARK REPORT" in report
	assert "bridge" in report or "comparison" in report


	if __name__ == "__main__":
	# Run all tests
	import traceback
	tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
	passed = failed = 0
	for test_fn in tests:
	try:
	test_fn()
	print(f" ✅ {test_fn.__name__}")
	passed += 1
	except Exception as e:
	print(f" ❌ {test_fn.__name__}: {e}")
	traceback.print_exc()
	failed += 1
	print(f"\n{'='*50}")
	print(f"Results: {passed} passed, {failed} failed, {passed+failed} total")
	if failed == 0:
	print("🎉 ALL TESTS PASSED!")
	else:
	print(f"⚠️ {failed} tests failed")