File size: 3,148 Bytes
7ff7119 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 | """rag_query_subgraph -- a search_documents chat tool dedikált subgraph-ja.
Topológia:
embed_query → hybrid_search → rerank → format → END
A LangSmith trace-ben ez a subgraph kibontva látszik (4 node), tisztán
elválasztva a chat agent-loop-tól. A `prototype-agentic` `rag/store.search_hybrid`
átfedéses mintát követjük.
"""
from __future__ import annotations
from typing import TypedDict
from langgraph.graph import END, START, StateGraph
from store import HybridStore
class RAGQueryState(TypedDict, total=False):
query: str
top_k: int
raw_hits: list[dict]
reranked_hits: list[dict]
output: str
def _make_hybrid_search_node(store: HybridStore):
async def hybrid_search_node(state: RAGQueryState) -> dict:
query = state.get("query", "")
top_k = state.get("top_k", 5)
if not query:
return {"raw_hits": []}
hits = await store.search_hybrid(query, top_k=top_k)
return {"raw_hits": hits}
return hybrid_search_node
async def rerank_node(state: RAGQueryState) -> dict:
"""Egyszerű kulcsszó-overlap rerank a top-k-on belül.
A RRF már egy fusion-rangsor, de a kulcsszó-boost az egzakt-match-eket előrébb
hozhatja (pl. "HI-100" cikkszám pontosan szerepel-e a chunkban).
"""
raw = state.get("raw_hits") or []
if not raw:
return {"reranked_hits": []}
query = state.get("query", "").lower()
query_tokens = set(query.split())
def boost(hit: dict) -> float:
text_lower = hit.get("text", "").lower()
# Kulcsszó-overlap arány
token_hits = sum(1 for t in query_tokens if t in text_lower)
match_ratio = token_hits / max(1, len(query_tokens))
return hit.get("score", 0.0) + 0.1 * match_ratio
reranked = sorted(raw, key=boost, reverse=True)
return {"reranked_hits": reranked}
async def format_node(state: RAGQueryState) -> dict:
"""Emberi olvasásra alkalmas output [Forrás: X] hivatkozásokkal."""
hits = state.get("reranked_hits") or state.get("raw_hits") or []
if not hits:
return {"output": "Nem találtam releváns találatot a feltöltött dokumentumokban."}
lines: list[str] = []
for i, h in enumerate(hits, 1):
meta = h.get("metadata") or {}
source = meta.get("source", "ismeretlen")
score = h.get("score", 0.0)
text = h.get("text", "")
# Max 200 karakter idézet a chunkból
snippet = text[:200] + ("..." if len(text) > 200 else "")
lines.append(
f"[Forrás: {source}, relevancia: {score:.3f}]\n{snippet}"
)
return {"output": "\n\n---\n\n".join(lines)}
def build_rag_query_subgraph(store: HybridStore):
"""Compile-olt rag_query subgraph."""
graph = StateGraph(RAGQueryState)
graph.add_node("hybrid_search", _make_hybrid_search_node(store))
graph.add_node("rerank", rerank_node)
graph.add_node("format", format_node)
graph.add_edge(START, "hybrid_search")
graph.add_edge("hybrid_search", "rerank")
graph.add_edge("rerank", "format")
graph.add_edge("format", END)
return graph.compile()
|