""" Unit Tests — GraphRAG Inference Hackathon ========================================== Tests for core utility functions across all layers. Run: python -m pytest tests/ -v """ import sys import os sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) # ── Layer 1: Graph Layer Tests ───────────────────────── def test_cosine_similarity_identical(): from graphrag.layers.graph_layer import cosine_similarity assert cosine_similarity([1, 0, 0], [1, 0, 0]) == 1.0 def test_cosine_similarity_orthogonal(): from graphrag.layers.graph_layer import cosine_similarity assert cosine_similarity([1, 0, 0], [0, 1, 0]) == 0.0 def test_cosine_similarity_opposite(): from graphrag.layers.graph_layer import cosine_similarity assert abs(cosine_similarity([1, 0], [-1, 0]) - (-1.0)) < 1e-9 def test_cosine_similarity_zero_vector(): from graphrag.layers.graph_layer import cosine_similarity assert cosine_similarity([0, 0, 0], [1, 2, 3]) == 0.0 def test_cosine_similarity_mismatched_lengths(): from graphrag.layers.graph_layer import cosine_similarity assert cosine_similarity([1, 2], [1, 2, 3]) == 0.0 def test_chunk_text_basic(): from graphrag.layers.graph_layer import chunk_text text = "Hello world. " * 100 chunks = chunk_text(text, chunk_size=200, overlap=20) assert len(chunks) > 1 assert all(len(c) <= 220 for c in chunks) # allow slight overshoot for sentence boundary def test_chunk_text_empty(): from graphrag.layers.graph_layer import chunk_text assert chunk_text("") == [] assert chunk_text(None) == [] def test_chunk_text_short(): from graphrag.layers.graph_layer import chunk_text result = chunk_text("Short text.", chunk_size=1000) assert len(result) == 1 assert result[0] == "Short text." def test_chunk_text_overlap(): from graphrag.layers.graph_layer import chunk_text text = "A" * 500 + " " + "B" * 500 chunks = chunk_text(text, chunk_size=300, overlap=50) assert len(chunks) >= 3 def test_generate_entity_id_deterministic(): from graphrag.layers.graph_layer import generate_entity_id id1 = generate_entity_id("Albert Einstein", "PERSON") id2 = generate_entity_id("Albert Einstein", "PERSON") assert id1 == id2 def test_generate_entity_id_case_insensitive(): from graphrag.layers.graph_layer import generate_entity_id id1 = generate_entity_id("Albert Einstein", "PERSON") id2 = generate_entity_id("albert einstein", "person") assert id1 == id2 def test_generate_entity_id_different_types(): from graphrag.layers.graph_layer import generate_entity_id id1 = generate_entity_id("Apple", "ORGANIZATION") id2 = generate_entity_id("Apple", "PRODUCT") assert id1 != id2 def test_generate_chunk_id(): from graphrag.layers.graph_layer import generate_chunk_id assert generate_chunk_id("doc1", 0) == "doc1_chunk_0000" assert generate_chunk_id("doc1", 42) == "doc1_chunk_0042" # ── Layer 4: Evaluation Tests ───────────────────────── def test_normalize_answer(): from graphrag.layers.evaluation_layer import normalize_answer assert normalize_answer("The Answer") == "answer" assert normalize_answer(" a big space ") == "big space" assert normalize_answer("Hello, World!") == "hello world" def test_compute_f1_perfect(): from graphrag.layers.evaluation_layer import compute_f1 assert compute_f1("the cat sat", "the cat sat") == 1.0 def test_compute_f1_partial(): from graphrag.layers.evaluation_layer import compute_f1 score = compute_f1("the cat sat on the mat", "the cat sat") assert 0.5 < score < 1.0 def test_compute_f1_no_overlap(): from graphrag.layers.evaluation_layer import compute_f1 assert compute_f1("dogs run fast", "cats sit quietly") == 0.0 def test_compute_f1_empty(): from graphrag.layers.evaluation_layer import compute_f1 assert compute_f1("", "") == 1.0 assert compute_f1("something", "") == 0.0 assert compute_f1("", "something") == 0.0 def test_compute_exact_match(): from graphrag.layers.evaluation_layer import compute_exact_match assert compute_exact_match("Yes", "yes") == 1.0 assert compute_exact_match("The answer", "the answer") == 1.0 assert compute_exact_match("Yes", "No") == 0.0 def test_compute_context_hit_rate(): from graphrag.layers.evaluation_layer import compute_context_hit_rate contexts = ["Einstein was born in Germany.", "He developed relativity."] facts = ["Einstein was born in Germany.", "He won Nobel Prize."] rate = compute_context_hit_rate(contexts, facts) assert rate == 0.5 def test_compute_context_hit_rate_empty(): from graphrag.layers.evaluation_layer import compute_context_hit_rate assert compute_context_hit_rate([], []) == 0.0 assert compute_context_hit_rate(["something"], []) == 0.0 def test_compute_token_efficiency(): from graphrag.layers.evaluation_layer import compute_token_efficiency assert compute_token_efficiency(100, 250) == 2.5 assert compute_token_efficiency(100, 50) == 0.5 assert compute_token_efficiency(0, 100) == 0.0 # ── Universal LLM Tests ────────────────────────────── def test_provider_registry_completeness(): from graphrag.layers.universal_llm import PROVIDERS expected = {"openai", "anthropic", "gemini", "mistral", "cohere", "ollama", "openrouter", "groq", "xai", "together", "huggingface", "deepseek"} assert set(PROVIDERS.keys()) == expected def test_provider_has_required_fields(): from graphrag.layers.universal_llm import PROVIDERS for pid, cfg in PROVIDERS.items(): assert "name" in cfg, f"{pid} missing name" assert "default_model" in cfg, f"{pid} missing default_model" assert "litellm_prefix" in cfg, f"{pid} missing litellm_prefix" assert "cost_input" in cfg, f"{pid} missing cost_input" assert "cost_output" in cfg, f"{pid} missing cost_output" def test_ollama_is_free(): from graphrag.layers.universal_llm import PROVIDERS ollama = PROVIDERS["ollama"] assert ollama["cost_input"] == 0 assert ollama["cost_output"] == 0 assert ollama.get("is_local") is True def test_get_available_providers_includes_ollama(): from graphrag.layers.universal_llm import get_available_providers available = get_available_providers() assert "ollama" in available # always included as local # ── Evaluation Layer Aggregate Tests ────────────────── def test_evaluation_layer_aggregate(): from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample evl = EvaluationLayer() sample = EvalSample( query="test?", reference_answer="yes", baseline_answer="yes", graphrag_answer="yes indeed", question_type="factoid", difficulty="easy", ) evl.evaluate_sample(sample, baseline_tokens=100, graphrag_tokens=200, baseline_cost=0.001, graphrag_cost=0.002) agg = evl.compute_aggregate_metrics() assert agg["num_samples"] == 1 assert agg["baseline"]["avg_f1"] > 0 assert agg["graphrag"]["avg_f1"] > 0 def test_evaluation_layer_report(): from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample evl = EvaluationLayer() for i in range(3): sample = EvalSample(query=f"q{i}?", reference_answer="answer", baseline_answer="answer", graphrag_answer="answer", question_type="bridge" if i % 2 == 0 else "comparison") evl.evaluate_sample(sample, baseline_tokens=100+i*10, graphrag_tokens=200+i*20) report = evl.generate_report() assert "BENCHMARK REPORT" in report assert "bridge" in report or "comparison" in report if __name__ == "__main__": # Run all tests import traceback tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")] passed = failed = 0 for test_fn in tests: try: test_fn() print(f" ✅ {test_fn.__name__}") passed += 1 except Exception as e: print(f" ❌ {test_fn.__name__}: {e}") traceback.print_exc() failed += 1 print(f"\n{'='*50}") print(f"Results: {passed} passed, {failed} failed, {passed+failed} total") if failed == 0: print("🎉 ALL TESTS PASSED!") else: print(f"⚠️ {failed} tests failed")