Spaces:
Running
Running
| """Simple lexical retriever.""" | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| def retrieve(index_file: Path, query: str, top_k: int = 5) -> list[dict]: | |
| if not index_file.exists(): | |
| return [] | |
| docs = json.loads(index_file.read_text(encoding="utf-8")) | |
| q = query.lower().split() | |
| scored = [] | |
| for doc in docs: | |
| text = doc["text"].lower() | |
| score = sum(1 for token in q if token in text) | |
| scored.append((score, doc)) | |
| scored.sort(key=lambda x: x[0], reverse=True) | |
| return [doc for score, doc in scored[:top_k] if score > 0] | |