muthuk1 commited on
Commit
11bcc35
Β·
verified Β·
1 Parent(s): 47d8a83

Add comprehensive unit tests for core functions

Browse files
Files changed (1) hide show
  1. tests/test_core.py +214 -0
tests/test_core.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Unit Tests β€” GraphRAG Inference Hackathon
3
+ ==========================================
4
+ Tests for core utility functions across all layers.
5
+ Run: python -m pytest tests/ -v
6
+ """
7
+ import sys
8
+ import os
9
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
10
+
11
+
12
+ # ── Layer 1: Graph Layer Tests ─────────────────────────
13
+
14
+ def test_cosine_similarity_identical():
15
+ from graphrag.layers.graph_layer import cosine_similarity
16
+ assert cosine_similarity([1, 0, 0], [1, 0, 0]) == 1.0
17
+
18
+ def test_cosine_similarity_orthogonal():
19
+ from graphrag.layers.graph_layer import cosine_similarity
20
+ assert cosine_similarity([1, 0, 0], [0, 1, 0]) == 0.0
21
+
22
+ def test_cosine_similarity_opposite():
23
+ from graphrag.layers.graph_layer import cosine_similarity
24
+ assert abs(cosine_similarity([1, 0], [-1, 0]) - (-1.0)) < 1e-9
25
+
26
+ def test_cosine_similarity_zero_vector():
27
+ from graphrag.layers.graph_layer import cosine_similarity
28
+ assert cosine_similarity([0, 0, 0], [1, 2, 3]) == 0.0
29
+
30
+ def test_cosine_similarity_mismatched_lengths():
31
+ from graphrag.layers.graph_layer import cosine_similarity
32
+ assert cosine_similarity([1, 2], [1, 2, 3]) == 0.0
33
+
34
+ def test_chunk_text_basic():
35
+ from graphrag.layers.graph_layer import chunk_text
36
+ text = "Hello world. " * 100
37
+ chunks = chunk_text(text, chunk_size=200, overlap=20)
38
+ assert len(chunks) > 1
39
+ assert all(len(c) <= 220 for c in chunks) # allow slight overshoot for sentence boundary
40
+
41
+ def test_chunk_text_empty():
42
+ from graphrag.layers.graph_layer import chunk_text
43
+ assert chunk_text("") == []
44
+ assert chunk_text(None) == []
45
+
46
+ def test_chunk_text_short():
47
+ from graphrag.layers.graph_layer import chunk_text
48
+ result = chunk_text("Short text.", chunk_size=1000)
49
+ assert len(result) == 1
50
+ assert result[0] == "Short text."
51
+
52
+ def test_chunk_text_overlap():
53
+ from graphrag.layers.graph_layer import chunk_text
54
+ text = "A" * 500 + " " + "B" * 500
55
+ chunks = chunk_text(text, chunk_size=300, overlap=50)
56
+ assert len(chunks) >= 3
57
+
58
+ def test_generate_entity_id_deterministic():
59
+ from graphrag.layers.graph_layer import generate_entity_id
60
+ id1 = generate_entity_id("Albert Einstein", "PERSON")
61
+ id2 = generate_entity_id("Albert Einstein", "PERSON")
62
+ assert id1 == id2
63
+
64
+ def test_generate_entity_id_case_insensitive():
65
+ from graphrag.layers.graph_layer import generate_entity_id
66
+ id1 = generate_entity_id("Albert Einstein", "PERSON")
67
+ id2 = generate_entity_id("albert einstein", "person")
68
+ assert id1 == id2
69
+
70
+ def test_generate_entity_id_different_types():
71
+ from graphrag.layers.graph_layer import generate_entity_id
72
+ id1 = generate_entity_id("Apple", "ORGANIZATION")
73
+ id2 = generate_entity_id("Apple", "PRODUCT")
74
+ assert id1 != id2
75
+
76
+ def test_generate_chunk_id():
77
+ from graphrag.layers.graph_layer import generate_chunk_id
78
+ assert generate_chunk_id("doc1", 0) == "doc1_chunk_0000"
79
+ assert generate_chunk_id("doc1", 42) == "doc1_chunk_0042"
80
+
81
+
82
+ # ── Layer 4: Evaluation Tests ─────────────────────────
83
+
84
+ def test_normalize_answer():
85
+ from graphrag.layers.evaluation_layer import normalize_answer
86
+ assert normalize_answer("The Answer") == "answer"
87
+ assert normalize_answer(" a big space ") == "big space"
88
+ assert normalize_answer("Hello, World!") == "hello world"
89
+
90
+ def test_compute_f1_perfect():
91
+ from graphrag.layers.evaluation_layer import compute_f1
92
+ assert compute_f1("the cat sat", "the cat sat") == 1.0
93
+
94
+ def test_compute_f1_partial():
95
+ from graphrag.layers.evaluation_layer import compute_f1
96
+ score = compute_f1("the cat sat on the mat", "the cat sat")
97
+ assert 0.5 < score < 1.0
98
+
99
+ def test_compute_f1_no_overlap():
100
+ from graphrag.layers.evaluation_layer import compute_f1
101
+ assert compute_f1("dogs run fast", "cats sit quietly") == 0.0
102
+
103
+ def test_compute_f1_empty():
104
+ from graphrag.layers.evaluation_layer import compute_f1
105
+ assert compute_f1("", "") == 1.0
106
+ assert compute_f1("something", "") == 0.0
107
+ assert compute_f1("", "something") == 0.0
108
+
109
+ def test_compute_exact_match():
110
+ from graphrag.layers.evaluation_layer import compute_exact_match
111
+ assert compute_exact_match("Yes", "yes") == 1.0
112
+ assert compute_exact_match("The answer", "the answer") == 1.0
113
+ assert compute_exact_match("Yes", "No") == 0.0
114
+
115
+ def test_compute_context_hit_rate():
116
+ from graphrag.layers.evaluation_layer import compute_context_hit_rate
117
+ contexts = ["Einstein was born in Germany.", "He developed relativity."]
118
+ facts = ["Einstein was born in Germany.", "He won Nobel Prize."]
119
+ rate = compute_context_hit_rate(contexts, facts)
120
+ assert rate == 0.5
121
+
122
+ def test_compute_context_hit_rate_empty():
123
+ from graphrag.layers.evaluation_layer import compute_context_hit_rate
124
+ assert compute_context_hit_rate([], []) == 0.0
125
+ assert compute_context_hit_rate(["something"], []) == 0.0
126
+
127
+ def test_compute_token_efficiency():
128
+ from graphrag.layers.evaluation_layer import compute_token_efficiency
129
+ assert compute_token_efficiency(100, 250) == 2.5
130
+ assert compute_token_efficiency(100, 50) == 0.5
131
+ assert compute_token_efficiency(0, 100) == 0.0
132
+
133
+
134
+ # ── Universal LLM Tests ──────────────────────────────
135
+
136
+ def test_provider_registry_completeness():
137
+ from graphrag.layers.universal_llm import PROVIDERS
138
+ expected = {"openai", "anthropic", "gemini", "mistral", "cohere",
139
+ "ollama", "openrouter", "groq", "xai", "together",
140
+ "huggingface", "deepseek"}
141
+ assert set(PROVIDERS.keys()) == expected
142
+
143
+ def test_provider_has_required_fields():
144
+ from graphrag.layers.universal_llm import PROVIDERS
145
+ for pid, cfg in PROVIDERS.items():
146
+ assert "name" in cfg, f"{pid} missing name"
147
+ assert "default_model" in cfg, f"{pid} missing default_model"
148
+ assert "litellm_prefix" in cfg, f"{pid} missing litellm_prefix"
149
+ assert "cost_input" in cfg, f"{pid} missing cost_input"
150
+ assert "cost_output" in cfg, f"{pid} missing cost_output"
151
+
152
+ def test_ollama_is_free():
153
+ from graphrag.layers.universal_llm import PROVIDERS
154
+ ollama = PROVIDERS["ollama"]
155
+ assert ollama["cost_input"] == 0
156
+ assert ollama["cost_output"] == 0
157
+ assert ollama.get("is_local") is True
158
+
159
+ def test_get_available_providers_includes_ollama():
160
+ from graphrag.layers.universal_llm import get_available_providers
161
+ available = get_available_providers()
162
+ assert "ollama" in available # always included as local
163
+
164
+
165
+ # ── Evaluation Layer Aggregate Tests ──────────────────
166
+
167
+ def test_evaluation_layer_aggregate():
168
+ from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
169
+ evl = EvaluationLayer()
170
+ sample = EvalSample(
171
+ query="test?", reference_answer="yes",
172
+ baseline_answer="yes", graphrag_answer="yes indeed",
173
+ question_type="factoid", difficulty="easy",
174
+ )
175
+ evl.evaluate_sample(sample, baseline_tokens=100, graphrag_tokens=200,
176
+ baseline_cost=0.001, graphrag_cost=0.002)
177
+ agg = evl.compute_aggregate_metrics()
178
+ assert agg["num_samples"] == 1
179
+ assert agg["baseline"]["avg_f1"] > 0
180
+ assert agg["graphrag"]["avg_f1"] > 0
181
+
182
+ def test_evaluation_layer_report():
183
+ from graphrag.layers.evaluation_layer import EvaluationLayer, EvalSample
184
+ evl = EvaluationLayer()
185
+ for i in range(3):
186
+ sample = EvalSample(query=f"q{i}?", reference_answer="answer",
187
+ baseline_answer="answer", graphrag_answer="answer",
188
+ question_type="bridge" if i % 2 == 0 else "comparison")
189
+ evl.evaluate_sample(sample, baseline_tokens=100+i*10, graphrag_tokens=200+i*20)
190
+ report = evl.generate_report()
191
+ assert "BENCHMARK REPORT" in report
192
+ assert "bridge" in report or "comparison" in report
193
+
194
+
195
+ if __name__ == "__main__":
196
+ # Run all tests
197
+ import traceback
198
+ tests = [v for k, v in sorted(globals().items()) if k.startswith("test_")]
199
+ passed = failed = 0
200
+ for test_fn in tests:
201
+ try:
202
+ test_fn()
203
+ print(f" βœ… {test_fn.__name__}")
204
+ passed += 1
205
+ except Exception as e:
206
+ print(f" ❌ {test_fn.__name__}: {e}")
207
+ traceback.print_exc()
208
+ failed += 1
209
+ print(f"\n{'='*50}")
210
+ print(f"Results: {passed} passed, {failed} failed, {passed+failed} total")
211
+ if failed == 0:
212
+ print("πŸŽ‰ ALL TESTS PASSED!")
213
+ else:
214
+ print(f"⚠️ {failed} tests failed")