| """rag_query_subgraph -- a search_documents chat tool dedikált subgraph-ja. |
| |
| Topológia: |
| embed_query → hybrid_search → rerank → format → END |
| |
| A LangSmith trace-ben ez a subgraph kibontva látszik (4 node), tisztán |
| elválasztva a chat agent-loop-tól. A `prototype-agentic` `rag/store.search_hybrid` |
| átfedéses mintát követjük. |
| """ |
|
|
| from __future__ import annotations |
|
|
| from typing import TypedDict |
|
|
| from langgraph.graph import END, START, StateGraph |
|
|
| from store import HybridStore |
|
|
|
|
| class RAGQueryState(TypedDict, total=False): |
| query: str |
| top_k: int |
| raw_hits: list[dict] |
| reranked_hits: list[dict] |
| output: str |
|
|
|
|
| def _make_hybrid_search_node(store: HybridStore): |
| async def hybrid_search_node(state: RAGQueryState) -> dict: |
| query = state.get("query", "") |
| top_k = state.get("top_k", 5) |
| if not query: |
| return {"raw_hits": []} |
| hits = await store.search_hybrid(query, top_k=top_k) |
| return {"raw_hits": hits} |
|
|
| return hybrid_search_node |
|
|
|
|
| async def rerank_node(state: RAGQueryState) -> dict: |
| """Egyszerű kulcsszó-overlap rerank a top-k-on belül. |
| |
| A RRF már egy fusion-rangsor, de a kulcsszó-boost az egzakt-match-eket előrébb |
| hozhatja (pl. "HI-100" cikkszám pontosan szerepel-e a chunkban). |
| """ |
| raw = state.get("raw_hits") or [] |
| if not raw: |
| return {"reranked_hits": []} |
|
|
| query = state.get("query", "").lower() |
| query_tokens = set(query.split()) |
|
|
| def boost(hit: dict) -> float: |
| text_lower = hit.get("text", "").lower() |
| |
| token_hits = sum(1 for t in query_tokens if t in text_lower) |
| match_ratio = token_hits / max(1, len(query_tokens)) |
| return hit.get("score", 0.0) + 0.1 * match_ratio |
|
|
| reranked = sorted(raw, key=boost, reverse=True) |
| return {"reranked_hits": reranked} |
|
|
|
|
| async def format_node(state: RAGQueryState) -> dict: |
| """Emberi olvasásra alkalmas output [Forrás: X] hivatkozásokkal.""" |
| hits = state.get("reranked_hits") or state.get("raw_hits") or [] |
| if not hits: |
| return {"output": "Nem találtam releváns találatot a feltöltött dokumentumokban."} |
|
|
| lines: list[str] = [] |
| for i, h in enumerate(hits, 1): |
| meta = h.get("metadata") or {} |
| source = meta.get("source", "ismeretlen") |
| score = h.get("score", 0.0) |
| text = h.get("text", "") |
| |
| snippet = text[:200] + ("..." if len(text) > 200 else "") |
| lines.append( |
| f"[Forrás: {source}, relevancia: {score:.3f}]\n{snippet}" |
| ) |
|
|
| return {"output": "\n\n---\n\n".join(lines)} |
|
|
|
|
| def build_rag_query_subgraph(store: HybridStore): |
| """Compile-olt rag_query subgraph.""" |
| graph = StateGraph(RAGQueryState) |
| graph.add_node("hybrid_search", _make_hybrid_search_node(store)) |
| graph.add_node("rerank", rerank_node) |
| graph.add_node("format", format_node) |
| graph.add_edge(START, "hybrid_search") |
| graph.add_edge("hybrid_search", "rerank") |
| graph.add_edge("rerank", "format") |
| graph.add_edge("format", END) |
| return graph.compile() |
|
|