"""TDD for spaceutil.encoder. Requires the real Qwen3 model - marked `model` and skipped by default. Run with `pytest -m model`.""" from __future__ import annotations import numpy as np import pytest pytestmark = pytest.mark.model def test_encode_query_returns_normalized_vector(synthetic_embed_provenance): from spaceutil.encoder import encode_query_via_optcg vec = encode_query_via_optcg("red blocker with draw", synthetic_embed_provenance) arr = np.asarray(vec, dtype=np.float32) assert arr.shape == (synthetic_embed_provenance.embedding_dim,) norm = float(np.linalg.norm(arr)) assert abs(norm - 1.0) < 1e-4 def test_paraphrases_are_close(synthetic_embed_provenance): """Loose floor - the model is small. We assert > 0.4 cosine, well above random.""" from spaceutil.encoder import encode_query_via_optcg a = np.asarray( encode_query_via_optcg("red blocker", synthetic_embed_provenance), dtype=np.float32, ) b = np.asarray( encode_query_via_optcg("crimson defender", synthetic_embed_provenance), dtype=np.float32, ) cosine = float(np.dot(a, b)) assert cosine > 0.4, f"paraphrases too distant: cosine={cosine}" def test_model_cached_across_calls(synthetic_embed_provenance): """Second call must reuse the loaded model - the warmup pattern is load-bearing for cold-start UX.""" import time from spaceutil.encoder import encode_query_via_optcg, get_encoder _ = get_encoder(synthetic_embed_provenance) # warmup t0 = time.time() _ = encode_query_via_optcg("hello world", synthetic_embed_provenance) first = time.time() - t0 t0 = time.time() _ = encode_query_via_optcg("goodbye world", synthetic_embed_provenance) second = time.time() - t0 assert second < first * 5, ( f"second call ({second:.3f}s) much slower than first ({first:.3f}s) - " "model likely re-loaded" )