hackathon / tests /rag /test_ingest.py
mekosotto's picture
feat(rag): ingest CLI (markdown/PDF → chunks → FAISS) + sample KB fixtures
cf5c011
"""Tests for src.rag.ingest — walk a directory, chunk, embed, persist."""
from __future__ import annotations
import shutil
from pathlib import Path
import pytest
from src.rag.ingest import ingest_directory
from src.rag.store import FAISSStore
_FIXTURE_KB = Path(__file__).parent.parent / "fixtures" / "kb_sample"
class TestIngestDirectory:
def test_ingests_markdown_files(self, tmp_path: Path) -> None:
out_dir = tmp_path / "idx"
n = ingest_directory(_FIXTURE_KB, out_dir)
assert n > 0 # at least one chunk per fixture file
assert (out_dir / "index.bin").exists()
assert (out_dir / "chunks.json").exists()
def test_loaded_store_is_searchable(self, tmp_path: Path) -> None:
out_dir = tmp_path / "idx"
ingest_directory(_FIXTURE_KB, out_dir)
from src.rag.embed import EMBEDDING_DIM
store = FAISSStore.load(out_dir, dim=EMBEDDING_DIM)
assert len(store) > 0
# chunks have source metadata
assert all("source" in c for c in store._chunks)
assert all("text" in c for c in store._chunks)
def test_empty_directory_creates_empty_index(self, tmp_path: Path) -> None:
empty = tmp_path / "empty_kb"
empty.mkdir()
out_dir = tmp_path / "idx"
n = ingest_directory(empty, out_dir)
assert n == 0
assert (out_dir / "index.bin").exists()