| """Tests for src.rag.ingest — walk a directory, chunk, embed, persist.""" |
| from __future__ import annotations |
|
|
| import shutil |
| from pathlib import Path |
|
|
| import pytest |
|
|
| from src.rag.ingest import ingest_directory |
| from src.rag.store import FAISSStore |
|
|
|
|
| _FIXTURE_KB = Path(__file__).parent.parent / "fixtures" / "kb_sample" |
|
|
|
|
| class TestIngestDirectory: |
| def test_ingests_markdown_files(self, tmp_path: Path) -> None: |
| out_dir = tmp_path / "idx" |
| n = ingest_directory(_FIXTURE_KB, out_dir) |
| assert n > 0 |
| assert (out_dir / "index.bin").exists() |
| assert (out_dir / "chunks.json").exists() |
|
|
| def test_loaded_store_is_searchable(self, tmp_path: Path) -> None: |
| out_dir = tmp_path / "idx" |
| ingest_directory(_FIXTURE_KB, out_dir) |
| from src.rag.embed import EMBEDDING_DIM |
| store = FAISSStore.load(out_dir, dim=EMBEDDING_DIM) |
| assert len(store) > 0 |
| |
| assert all("source" in c for c in store._chunks) |
| assert all("text" in c for c in store._chunks) |
|
|
| def test_empty_directory_creates_empty_index(self, tmp_path: Path) -> None: |
| empty = tmp_path / "empty_kb" |
| empty.mkdir() |
| out_dir = tmp_path / "idx" |
| n = ingest_directory(empty, out_dir) |
| assert n == 0 |
| assert (out_dir / "index.bin").exists() |
|
|