muthuk1
/

graphrag-inference-hackathon

Model card Files Files and versions

xet

Community

muthuk1 commited on 12 days ago

Commit

11bcc35

verified ·

1 Parent(s): 47d8a83

Add comprehensive unit tests for core functions

Browse files

Files changed (1) hide show

tests/test_core.py +214 -0

tests/test_core.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+Unit Tests — GraphRAG Inference Hackathon
+==========================================
+Tests for core utility functions across all layers.
+Run: python -m pytest tests/ -v
+"""
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+# ── Layer 1: Graph Layer Tests ─────────────────────────
+def test_cosine_similarity_identical():
+    from graphrag.layers.graph_layer import cosine_similarity
+    assert cosine_similarity([1, 0, 0], [1, 0, 0]) == 1.0
+def test_cosine_similarity_orthogonal():
+    from graphrag.layers.graph_layer import cosine_similarity
+    assert cosine_similarity([1, 0, 0], [0, 1, 0]) == 0.0
+def test_cosine_similarity_opposite():
+    from graphrag.layers.graph_layer import cosine_similarity
+    assert abs(cosine_similarity([1, 0], [-1, 0]) - (-1.0)) < 1e-9
+def test_cosine_similarity_zero_vector():
+    from graphrag.layers.graph_layer import cosine_similarity
+    assert cosine_similarity([0, 0, 0], [1, 2, 3]) == 0.0
+def test_cosine_similarity_mismatched_lengths():
+    from graphrag.layers.graph_layer import cosine_similarity
+    assert cosine_similarity([1, 2], [1, 2, 3]) == 0.0
+def test_chunk_text_basic():
+    from graphrag.layers.graph_layer import chunk_text
+    text = "Hello world. " * 100
+    chunks = chunk_text(text, chunk_size=200, overlap=20)
+    assert len(chunks) > 1
+    assert all(len(c) <= 220 for c in chunks)  # allow slight overshoot for sentence boundary
+def test_chunk_text_empty():
+    from graphrag.layers.graph_layer import chunk_text
+    assert chunk_text("") == []
+    assert chunk_text(None) == []
+def test_chunk_text_short():
+    from graphrag.layers.graph_layer import chunk_text
+    result = chunk_text("Short text.", chunk_size=1000)
+    assert len(result) == 1
+    assert result[0] == "Short text."
+def test_chunk_text_overlap():
+    from graphrag.layers.graph_layer import chunk_text
+    text = "A" * 500 + " " + "B" * 500
+    chunks = chunk_text(text, chunk_size=300, overlap=50)
+    assert len(chunks) >= 3
+def test_generate_entity_id_deterministic():
+    from graphrag.layers.graph_layer import generate_entity_id
+    id1 = generate_entity_id("Albert Einstein", "PERSON")
+    id2 = generate_entity_id("Albert Einstein", "PERSON")
+    assert id1 == id2
+def test_generate_entity_id_case_insensitive():
+    from graphrag.layers.graph_layer import generate_entity_id
+    id1 = generate_entity_id("Albert Einstein", "PERSON")
+    id2 = generate_entity_id("albert einstein", "person")
+    assert id1 == id2
+def test_generate_entity_id_different_types():
+    from graphrag.layers.graph_layer import generate_entity_id
+    id1 = generate_entity_id("Apple", "ORGANIZATION")
+    id2 = generate_entity_id("Apple", "PRODUCT")
+    assert id1 != id2
+def test_generate_chunk_id():
+    from graphrag.layers.graph_layer import generate_chunk_id
+    assert generate_chunk_id("doc1", 0) == "doc1_chunk_0000"
+    assert generate_chunk_id("doc1", 42) == "doc1_chunk_0042"
+# ── Layer 4: Evaluation Tests ─────────────────────────
+def test_normalize_answer():
+    from graphrag.layers.evaluation_layer import normalize_answer
+    assert normalize_answer("The Answer") == "answer"
+    assert normalize_answer("  a  big   space  ") == "big space"
+    assert normalize_answer("Hello, World!") == "hello world"
+def test_compute_f1_perfect():
+    from graphrag.layers.evaluation_layer import compute_f1
+    assert compute_f1("the cat sat", "the cat sat") == 1.0
+def test_compute_f1_partial():
+    from graphrag.layers.evaluation_layer import compute_f1
+    score = compute_f1("the cat sat on the mat", "the cat sat")
+    assert 0.5 < score < 1.0
+def test_compute_f1_no_overlap():
+    from graphrag.layers.evaluation_layer import compute_f1
+    assert compute_f1("dogs run fast", "cats sit quietly") == 0.0
+def test_compute_f1_empty():
+    from graphrag.layers.evaluation_layer import compute_f1
+    assert compute_f1("", "") == 1.0
+    assert compute_f1("something", "") == 0.0
+    assert compute_f1("", "something") == 0.0
+def test_compute_exact_match():
+    from graphrag.layers.evaluation_layer import compute_exact_match
+    assert compute_exact_match("Yes", "yes") == 1.0
+    assert compute_exact_match("The answer", "the answer") == 1.0
+    assert compute_exact_match("Yes", "No") == 0.0
+def test_compute_context_hit_rate():
+    from graphrag.layers.evaluation_layer import compute_context_hit_rate
+    contexts = ["Einstein was born in Germany.", "He developed relativity."]
+    facts = ["Einstein was born in Germany.", "He won Nobel Prize."]
+    rate = compute_context_hit_rate(contexts, facts)
+    assert rate == 0.5
+def test_compute_context_hit_rate_empty():
+    from graphrag.layers.evaluation_layer import compute_context_hit_rate
+    assert compute_context_hit_rate([], []) == 0.0
+    assert compute_context_hit_rate(["something"], []) == 0.0
+def test_compute_token_efficiency():
+    from graphrag.layers.evaluation_layer import compute_token_efficiency
+    assert compute_token_efficiency(100, 250) == 2.5
+    assert compute_token_efficiency(100, 50) == 0.5
+    assert compute_token_efficiency(0, 100) == 0.0
+# ── Universal LLM Tests ──────────────────────────────
+def test_provider_registry_completeness():
+    from graphrag.layers.universal_llm import PROVIDERS
+    expected = {"openai", "anthropic", "gemini", "mistral", "cohere",
+                "ollama", "openrouter", "groq", "xai", "together",
+                "huggingface", "deepseek"}
+    assert set(PROVIDERS.keys()) == expected
+def test_provider_has_required_fields():
+    from graphrag.layers.universal_llm import PROVIDERS
+    for pid, cfg in PROVIDERS.items():
+        assert "name" in cfg, f"{pid} missing name"
+        assert "default_model" in cfg, f"{pid} missing default_model"
+        assert "litellm_prefix" in cfg, f"{pid} missing litellm_prefix"
+        assert "cost_input" in cfg, f"{pid} missing cost_input"
+        assert "cost_output" in cfg, f"{pid} missing cost_output"
+def test_ollama_is_free():
+    from graphrag.layers.universal_llm import PROVIDERS
+    ollama = PROVIDERS["ollama"]
+    assert ollama["cost_input"] == 0
+    assert ollama["cost_output"] == 0
+    assert ollama.get("is_local") is True
+def test_get_available_providers_includes_ollama():
+    from graphrag.layers.universal_llm import get_available_providers
+    available = get_available_providers()
+    assert "ollama" in available  # always included as local
+# ── Evaluation Layer Aggregate Tests ──────────────────
+def test_evaluation_layer_aggregate():
+    from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
+    evl = EvaluationLayer()
+    sample = EvalSample(
+        query="test?", reference_answer="yes",
+        baseline_answer="yes", graphrag_answer="yes indeed",
+        question_type="factoid", difficulty="easy",
+    )
+    evl.evaluate_sample(sample, baseline_tokens=100, graphrag_tokens=200,
+                        baseline_cost=0.001, graphrag_cost=0.002)
+    agg = evl.compute_aggregate_metrics()
+    assert agg["num_samples"] == 1
+    assert agg["baseline"]["avg_f1"] > 0
+    assert agg["graphrag"]["avg_f1"] > 0
+def test_evaluation_layer_report():
+    from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
+    evl = EvaluationLayer()
+    for i in range(3):
+        sample = EvalSample(query=f"q{i}?", reference_answer="answer",
+                            baseline_answer="answer", graphrag_answer="answer",
+                            question_type="bridge" if i % 2 == 0 else "comparison")
+        evl.evaluate_sample(sample, baseline_tokens=100+i*10, graphrag_tokens=200+i*20)
+    report = evl.generate_report()
+    assert "BENCHMARK REPORT" in report
+    assert "bridge" in report or "comparison" in report
+if __name__ == "__main__":
+    # Run all tests
+    import traceback
+    tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
+    passed = failed = 0
+    for test_fn in tests:
+        try:
+            test_fn()
+            print(f"  ✅ {test_fn.__name__}")
+            passed += 1
+        except Exception as e:
+            print(f"  ❌ {test_fn.__name__}: {e}")
+            traceback.print_exc()
+            failed += 1
+    print(f"\n{'='*50}")
+    print(f"Results: {passed} passed, {failed} failed, {passed+failed} total")
+    if failed == 0:
+        print("🎉 ALL TESTS PASSED!")
+    else:
+        print(f"⚠️  {failed} tests failed")