TheJackBright's picture
Deploy PolyGuard OpenEnv Space
877add7 verified
"""Simple lexical retriever."""
from __future__ import annotations
import json
from pathlib import Path
def retrieve(index_file: Path, query: str, top_k: int = 5) -> list[dict]:
if not index_file.exists():
return []
docs = json.loads(index_file.read_text(encoding="utf-8"))
q = query.lower().split()
scored = []
for doc in docs:
text = doc["text"].lower()
score = sum(1 for token in q if token in text)
scored.append((score, doc))
scored.sort(key=lambda x: x[0], reverse=True)
return [doc for score, doc in scored[:top_k] if score > 0]