File size: 1,393 Bytes
cf5c011
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
"""Tests for src.rag.ingest — walk a directory, chunk, embed, persist."""
from __future__ import annotations

import shutil
from pathlib import Path

import pytest

from src.rag.ingest import ingest_directory
from src.rag.store import FAISSStore


_FIXTURE_KB = Path(__file__).parent.parent / "fixtures" / "kb_sample"


class TestIngestDirectory:
    def test_ingests_markdown_files(self, tmp_path: Path) -> None:
        out_dir = tmp_path / "idx"
        n = ingest_directory(_FIXTURE_KB, out_dir)
        assert n > 0  # at least one chunk per fixture file
        assert (out_dir / "index.bin").exists()
        assert (out_dir / "chunks.json").exists()

    def test_loaded_store_is_searchable(self, tmp_path: Path) -> None:
        out_dir = tmp_path / "idx"
        ingest_directory(_FIXTURE_KB, out_dir)
        from src.rag.embed import EMBEDDING_DIM
        store = FAISSStore.load(out_dir, dim=EMBEDDING_DIM)
        assert len(store) > 0
        # chunks have source metadata
        assert all("source" in c for c in store._chunks)
        assert all("text" in c for c in store._chunks)

    def test_empty_directory_creates_empty_index(self, tmp_path: Path) -> None:
        empty = tmp_path / "empty_kb"
        empty.mkdir()
        out_dir = tmp_path / "idx"
        n = ingest_directory(empty, out_dir)
        assert n == 0
        assert (out_dir / "index.bin").exists()