File size: 2,693 Bytes
7cc3fef 57801de | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | """Tests for src.rag.store — FAISS vector store with metadata."""
from __future__ import annotations
from pathlib import Path
import numpy as np
import pytest
from src.rag.store import FAISSStore
def _rand_vecs(n: int, d: int = 4, seed: int = 0) -> np.ndarray:
rng = np.random.default_rng(seed)
return rng.standard_normal((n, d), dtype=np.float32)
class TestFAISSStore:
def test_add_then_search(self) -> None:
store = FAISSStore(dim=4)
vecs = _rand_vecs(3)
chunks = [{"text": f"chunk-{i}", "source": "test.md"} for i in range(3)]
store.add(vecs, chunks)
results = store.search(vecs[0], k=2)
assert len(results) == 2
# the closest hit is the chunk we used as the query (cosine ~1.0)
top_chunk, top_score = results[0]
assert top_chunk["text"] == "chunk-0"
assert top_score > 0.99
def test_add_size_mismatch_raises(self) -> None:
store = FAISSStore(dim=4)
with pytest.raises(ValueError, match="size mismatch"):
store.add(_rand_vecs(3), [{"text": "only-one"}])
def test_search_k_larger_than_corpus(self) -> None:
store = FAISSStore(dim=4)
store.add(_rand_vecs(2), [{"text": f"c{i}"} for i in range(2)])
results = store.search(_rand_vecs(1)[0], k=10)
assert len(results) == 2
def test_save_load_roundtrip(self, tmp_path: Path) -> None:
store = FAISSStore(dim=4)
vecs = _rand_vecs(3)
chunks = [{"text": f"chunk-{i}", "source": "test.md"} for i in range(3)]
store.add(vecs, chunks)
store.save(tmp_path / "idx")
restored = FAISSStore.load(tmp_path / "idx", dim=4)
results = restored.search(vecs[0], k=1)
assert results[0][0]["text"] == "chunk-0"
def test_search_on_empty_store_returns_empty(self) -> None:
store = FAISSStore(dim=4)
assert store.search(_rand_vecs(1)[0], k=5) == []
def test_add_does_not_mutate_caller_vectors(self) -> None:
store = FAISSStore(dim=4)
vecs = _rand_vecs(3)
original = vecs.copy()
store.add(vecs, [{"text": f"c{i}"} for i in range(3)])
# Caller's array must be unchanged after add() (faiss.normalize_L2 is in-place)
assert np.allclose(vecs, original), "store.add() mutated caller's vectors"
def test_search_does_not_mutate_caller_query(self) -> None:
store = FAISSStore(dim=4)
store.add(_rand_vecs(3), [{"text": f"c{i}"} for i in range(3)])
query = _rand_vecs(1)[0]
original_query = query.copy()
store.search(query, k=2)
assert np.allclose(query, original_query), "store.search() mutated caller's query"
|