PaperBrainAI / backend /app /tools /tool_rag_qa.py
=Apyhtml20
Initial deploy
99b596a
import os
from huggingface_hub import InferenceClient
HF_TOKEN = os.getenv("HF_TOKEN", "")
MODEL_NAME = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
RELEVANCE_THRESHOLD = 0.4
_client = None
def _get_client() -> InferenceClient:
global _client
if _client is None:
_client = InferenceClient(token=HF_TOKEN or None)
return _client
def _call_hf(system: str, user: str, max_tokens: int = 1024, temperature: float = 0.4) -> str:
client = _get_client()
response = client.chat_completion(
model=MODEL_NAME,
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
max_tokens=max_tokens,
temperature=temperature,
)
return response.choices[0].message.content.strip()
def rag_qa(query: str, history_text: str = "") -> tuple[str, list[str]]:
from app.rag import query_documents
results = query_documents(query, n_results=3)
documents = results.get("documents", [[]])[0]
metadatas = results.get("metadatas", [[]])[0]
distances = results.get("distances", [[]])[0]
relevant_docs = [
(doc, meta)
for doc, meta, dist in zip(documents, metadatas, distances)
if dist < RELEVANCE_THRESHOLD
]
if not relevant_docs:
return ("Je n'ai pas trouvé d'information pertinente dans vos cours.", [])
context = "\n\n---\n\n".join([doc for doc, _ in relevant_docs])
sources = list(set([meta.get("source", "inconnu") for _, meta in relevant_docs]))
system = (
"Tu es un assistant pédagogique RAG. "
"Réponds à la question en te basant UNIQUEMENT sur le contexte fourni. "
"Si la réponse n'est pas dans le contexte, dis-le clairement. "
"Réponds dans la même langue que la question."
)
history_section = f"Historique:\n{history_text}\n\n" if history_text else ""
user = f"{history_section}Contexte :\n{context[:3000]}\n\nQuestion : {query}"
answer = _call_hf(system, user)
return answer, sources