vn6295337's picture
Initial commit: RAG Document Assistant with Zero-Storage Privacy
f866820
"""
Structured prompt templates for RAG pipeline.
This module implements a layered prompt architecture:
- SYSTEM: Role and behavior rules (fixed)
- DEVELOPER: Grounding rules and constraints (RAG-specific)
- CONTEXT: Retrieved chunks (dynamic)
- USER: Query (dynamic)
"""
from typing import List, Dict, Any
# Layer 1: System prompt - defines the assistant's role and core behavior
SYSTEM_PROMPT = """You are a document assistant. You answer questions using ONLY the provided context.
You are precise, concise, and always cite your sources."""
# Layer 2: Developer prompt - RAG-specific grounding rules and output format
DEVELOPER_PROMPT = """## Grounding Rules
- Answer ONLY using information from the CONTEXT section below
- If the context doesn't contain enough information to answer, say "I don't have enough information to answer this based on the provided documents"
- NEVER fabricate or infer information not explicitly present in the context
- Keep answers under 3 paragraphs unless the query explicitly asks for detail
## Citation Format
- Cite sources inline using [ID:chunk_id] format immediately after the relevant claim
- Every factual claim MUST have a citation
- At the end of your answer, list all cited chunks under "Sources:" with their IDs"""
# Layer 3: Context template - structured container for retrieved chunks
CONTEXT_TEMPLATE = """## Context Chunks
The following are relevant excerpts from the document corpus, ranked by relevance:
{chunks}"""
# Layer 4: User query template
USER_TEMPLATE = """## Question
{query}"""
def format_chunk(chunk: Dict[str, Any], index: int) -> str:
"""
Format a single chunk with clear boundaries and metadata.
Args:
chunk: Dict with 'id', 'text', and optionally 'score', 'metadata' keys
index: Chunk position (for display)
Returns:
Formatted chunk string
"""
chunk_id = chunk.get("id", f"chunk_{index}")
score = chunk.get("score")
text = chunk.get("text", "")
# Build score line only if score exists
score_line = f"Relevance Score: {score:.3f}" if score is not None else ""
formatted = f"""---
Chunk ID: {chunk_id}
{score_line}
Content:
{text}
---"""
return formatted
def build_rag_prompt(
query: str,
chunks: List[Dict[str, Any]],
k: int = 5,
include_scores: bool = True
) -> str:
"""
Build a structured RAG prompt from query and retrieved chunks.
The prompt follows a clear hierarchy:
1. SYSTEM - Role definition
2. DEVELOPER - Grounding rules and format constraints
3. CONTEXT - Retrieved document chunks
4. USER - The actual query
Args:
query: User's question
chunks: List of dicts with 'id', 'text', and optionally 'score' keys
k: Maximum number of chunks to include
include_scores: Whether to show relevance scores
Returns:
Formatted prompt string ready for LLM
"""
# Format each chunk with clear boundaries
chunk_strs = []
for i, chunk in enumerate(chunks[:k]):
if not include_scores:
# Remove score for cleaner output if desired
chunk_copy = {k: v for k, v in chunk.items() if k != "score"}
chunk_strs.append(format_chunk(chunk_copy, i))
else:
chunk_strs.append(format_chunk(chunk, i))
formatted_chunks = "\n\n".join(chunk_strs) if chunk_strs else "(No relevant context found)"
# Assemble full prompt with clear section markers
prompt = f"""{SYSTEM_PROMPT}
{DEVELOPER_PROMPT}
{CONTEXT_TEMPLATE.format(chunks=formatted_chunks)}
{USER_TEMPLATE.format(query=query)}
## Answer"""
return prompt