"""
ClauseGuard — Contract Q&A Chatbot (RAG) v1.0
═══════════════════════════════════════════════
Architecture:
  User asks question about their contract
          ↓
  [1] Embed question with sentence-transformers (all-MiniLM-L6-v2)
          ↓
  [2] Retrieve top-5 most relevant chunks from contract
          ↓
  [3] Build prompt:
      - System: ClauseGuard analysis results (clauses, entities, risk scores)
      - Context: Retrieved contract chunks (≤2.5K tokens)
      - User question
          ↓
  [4] Stream response from LLM via HF Inference API

Key design:
  • Analyzed data (clauses, entities, risk scores) → system prompt
  • Raw contract text → RAG retrieval
  • This gives the model both structured analysis AND verbatim evidence
"""

import os
import re
import numpy as np

# ── Embedding model (soft-fail) ─────────────────────────────────────
_HAS_EMBEDDER = False
_embedder = None

try:
    from sentence_transformers import SentenceTransformer
    _HAS_EMBEDDER = True
except ImportError:
    pass

# ── HF Inference Client (soft-fail) ─────────────────────────────────
_HAS_INFERENCE = False
_llm_client = None

try:
    from huggingface_hub import InferenceClient
    _HAS_INFERENCE = True
except ImportError:
    pass

# ═══════════════════════════════════════════════════════════════════════
# MODEL LOADING
# ═══════════════════════════════════════════════════════════════════════

_chatbot_status = {"embedder": "not_loaded", "llm": "not_loaded"}

def _load_embedder():
    """Load sentence-transformers embedding model (lazy).
    PERF v4.3: Upgraded from all-MiniLM-L6-v2 to BAAI/bge-small-en-v1.5
    (+21% MTEB retrieval accuracy, same 384-dim, same latency)."""
    global _embedder, _chatbot_status
    if _embedder is not None:
        return _embedder
    if not _HAS_EMBEDDER:
        _chatbot_status["embedder"] = "unavailable"
        return None
    try:
        print("[ClauseGuard Chat] Loading embedding model: BAAI/bge-small-en-v1.5...")
        _embedder = SentenceTransformer("BAAI/bge-small-en-v1.5")
        _chatbot_status["embedder"] = "loaded"
        print("[ClauseGuard Chat] Embedding model loaded (BGE-small, 384-dim)")
        return _embedder
    except Exception as e:
        _chatbot_status["embedder"] = f"failed: {e}"
        print(f"[ClauseGuard Chat] Embedder load failed: {e}")
        return None


def _get_llm_client():
    """Get or create HF Inference Client (lazy)."""
    global _llm_client, _chatbot_status
    if _llm_client is not None:
        return _llm_client
    if not _HAS_INFERENCE:
        _chatbot_status["llm"] = "unavailable"
        return None
    try:
        token = os.environ.get("HF_TOKEN", "")
        _llm_client = InferenceClient(
            provider="hf-inference",
            api_key=token if token else None,
        )
        _chatbot_status["llm"] = "loaded"
        print("[ClauseGuard Chat] HF Inference Client initialized")
        return _llm_client
    except Exception as e:
        _chatbot_status["llm"] = f"failed: {e}"
        print(f"[ClauseGuard Chat] LLM client init failed: {e}")
        return None


def get_chatbot_status():
    """Return human-readable chatbot status."""
    parts = []
    for name, status in _chatbot_status.items():
        icon = "✅" if status == "loaded" else "⚠️" if "failed" in status else "❌"
        label = {"embedder": "Embeddings", "llm": "LLM API"}[name]
        parts.append(f"{icon} {label}: {status}")
    return " · ".join(parts)


# ═══════════════════════════════════════════════════════════════════════
# TEXT CHUNKING (sentence-preserving, ~300 tokens, no overlap)
# ═══════════════════════════════════════════════════════════════════════

def chunk_contract_text(text, target_chunk_size=300, min_chunk_size=50):
    """
    Split contract text into chunks for RAG retrieval.
    Sentence-preserving, ~300 tokens per chunk, 0% overlap.
    Research (arxiv 2601.14123): overlap adds cost with zero benefit.
    """
    if not text:
        return []

    # First split on paragraph boundaries
    paragraphs = re.split(r'\n\n+', text)
    chunks = []
    current_chunk = ""

    for para in paragraphs:
        para = para.strip()
        if not para:
            continue

        # Estimate word count (rough token proxy)
        words_current = len(current_chunk.split())
        words_para = len(para.split())

        if words_current + words_para <= target_chunk_size:
            current_chunk += ("\n\n" + para if current_chunk else para)
        else:
            # Current chunk is full enough — save it
            if words_current >= min_chunk_size:
                chunks.append(current_chunk.strip())
                current_chunk = para
            else:
                # Current chunk too small — need to split the paragraph into sentences
                sentences = re.split(r'(?<=[.!?])\s+(?=[A-Z])', para)
                for sent in sentences:
                    words_current = len(current_chunk.split())
                    words_sent = len(sent.split())
                    if words_current + words_sent <= target_chunk_size:
                        current_chunk += (" " + sent if current_chunk else sent)
                    else:
                        if words_current >= min_chunk_size:
                            chunks.append(current_chunk.strip())
                        current_chunk = sent

    # Don't forget the last chunk
    if current_chunk.strip() and len(current_chunk.split()) >= min_chunk_size:
        chunks.append(current_chunk.strip())

    return chunks


# ═══════════════════════════════════════════════════════════════════════
# EMBEDDING & RETRIEVAL
# ═══════════════════════════════════════════════════════════════════════

def build_embeddings(chunks):
    """
    Embed chunks using sentence-transformers.
    Returns numpy array of shape (N, 384) or None if embedder unavailable.
    """
    embedder = _load_embedder()
    if embedder is None or not chunks:
        return None
    try:
        embeddings = embedder.encode(
            chunks,
            normalize_embeddings=True,
            batch_size=32,
            show_progress_bar=False,
        )
        return embeddings  # numpy array (N, 384)
    except Exception as e:
        print(f"[ClauseGuard Chat] Embedding error: {e}")
        return None


def retrieve_chunks(query, chunks, embeddings, top_k=5):
    """
    Retrieve top-k most relevant chunks for a query.
    Uses cosine similarity (embeddings are L2-normalized → dot product = cosine).
    Context budget: top-5 chunks, ≤2.5K tokens.
    """
    embedder = _load_embedder()
    if embedder is None or embeddings is None or not chunks:
        return []

    try:
        # PERF v4.3: BGE models require query instruction prefix for retrieval
        _BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
        q_emb = embedder.encode([_BGE_QUERY_PREFIX + query], normalize_embeddings=True)
        scores = (q_emb @ embeddings.T)[0]
        top_indices = np.argsort(scores)[::-1][:top_k]

        results = []
        total_words = 0
        max_words = 600  # ~2.5K tokens budget

        for idx in top_indices:
            chunk = chunks[idx]
            chunk_words = len(chunk.split())
            if total_words + chunk_words > max_words and results:
                break
            results.append({
                "text": chunk,
                "score": float(scores[idx]),
                "index": int(idx),
            })
            total_words += chunk_words

        return results
    except Exception as e:
        print(f"[ClauseGuard Chat] Retrieval error: {e}")
        return []


# ═══════════════════════════════════════════════════════════════════════
# SYSTEM PROMPT BUILDER
# ═══════════════════════════════════════════════════════════════════════

def _build_system_prompt(analysis_result, retrieved_chunks):
    """
    Build the system prompt with:
    1. ClauseGuard analysis results (clauses, entities, risk scores) — NOT through RAG
    2. Retrieved contract chunks — through RAG
    """
    parts = []

    parts.append("""You are ClauseGuard AI, a legal contract analysis assistant. You help users understand their contracts by answering questions based on the contract text and analysis results.

RULES:
- Answer ONLY based on the provided contract text and analysis. Never make up information.
- If the answer isn't in the provided context, say "I don't see that information in the analyzed contract."
- Cite specific clauses or sections when possible.
- Be concise but thorough. Use plain language, not legal jargon.
- Always end with: "⚠️ This is AI analysis, not legal advice. Consult an attorney for legal decisions."
""")

    # Add analysis summary if available
    if analysis_result:
        risk = analysis_result.get("risk", {})
        parts.append(f"""
═══ CONTRACT ANALYSIS SUMMARY ═══
Risk Score: {risk.get('score', 'N/A')}/100 (Grade {risk.get('grade', 'N/A')})
Risk Breakdown: {risk.get('breakdown', {})}
Total Clauses Analyzed: {analysis_result.get('metadata', {}).get('total_clauses', 'N/A')}
Flagged Clauses: {analysis_result.get('metadata', {}).get('flagged_clauses', 'N/A')}
""")

        # Add detected clauses summary
        clauses = analysis_result.get("clauses", [])
        if clauses:
            clause_summary = []
            seen = set()
            for c in clauses:
                key = c["label"]
                if key not in seen:
                    seen.add(key)
                    risk_level = c.get("risk", "LOW")
                    clause_summary.append(f"  • [{risk_level}] {key}: {c.get('description', '')}")
            parts.append("═══ DETECTED CLAUSES ═══\n" + "\n".join(clause_summary[:20]))

        # Add entities summary
        entities = analysis_result.get("entities", [])
        if entities:
            entity_summary = []
            seen = set()
            for e in entities:
                key = f"{e['type']}: {e['text']}"
                if key not in seen and len(seen) < 15:
                    seen.add(key)
                    entity_summary.append(f"  • {e['type']}: {e['text']}")
            parts.append("═══ EXTRACTED ENTITIES ═══\n" + "\n".join(entity_summary))

        # Add contradictions
        contradictions = analysis_result.get("contradictions", [])
        if contradictions:
            contra_summary = []
            for c in contradictions:
                contra_summary.append(f"  • [{c['type']}] {c['explanation']}")
            parts.append("═══ CONTRADICTIONS / ISSUES ═══\n" + "\n".join(contra_summary))

    # Add retrieved contract text
    if retrieved_chunks:
        context_text = "\n---\n".join(c["text"] for c in retrieved_chunks)
        parts.append(f"""
═══ RELEVANT CONTRACT TEXT (Retrieved) ═══
{context_text}
""")

    return "\n\n".join(parts)


# ═══════════════════════════════════════════════════════════════════════
# CHAT RESPONSE (Streaming)
# ═══════════════════════════════════════════════════════════════════════

# LLM model to use
_LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"

def chat_respond(message, history, chunks, embeddings, analysis_result):
    """
    RAG chatbot response function for gr.ChatInterface.
    
    Args:
        message: User's question (str)
        history: Chat history (list of dicts with role/content)
        chunks: Contract text chunks (list of str)
        embeddings: Chunk embeddings (numpy array or None)
        analysis_result: Full analysis result dict (or None)
    
    Yields:
        Partial response string (streaming)
    """
    # Validate inputs
    if not chunks or embeddings is None:
        yield ("⚠️ No contract loaded yet. Please upload and analyze a contract in the "
               "**📄 Single Contract Analysis** tab first, then come back here to ask questions.")
        return

    if not message or not message.strip():
        yield "Please ask a question about your contract."
        return

    # Step 1: Retrieve relevant chunks
    retrieved = retrieve_chunks(message, chunks, embeddings, top_k=5)

    # Step 2: Build system prompt with analysis + retrieved context
    system_prompt = _build_system_prompt(analysis_result, retrieved)

    # Step 3: Build message history for LLM
    messages = [{"role": "system", "content": system_prompt}]

    # Add recent history (last 6 turns to stay in context window)
    if history:
        for h in history[-6:]:
            messages.append({"role": h["role"], "content": h["content"]})

    messages.append({"role": "user", "content": message})

    # Step 4: Stream response from LLM
    client = _get_llm_client()
    if client is None:
        yield ("⚠️ LLM service unavailable. Please ensure `huggingface_hub` is installed "
               "and `HF_TOKEN` is set.")
        return

    try:
        stream = client.chat_completion(
            model=_LLM_MODEL,
            messages=messages,
            max_tokens=1024,
            stream=True,
            temperature=0.3,  # Low temperature for factual responses
        )
        partial = ""
        for chunk in stream:
            token = chunk.choices[0].delta.content or ""
            partial += token
            yield partial
    except Exception as e:
        error_msg = str(e)
        if "rate limit" in error_msg.lower() or "429" in error_msg:
            yield ("⚠️ Rate limit reached on the free HF Inference API. "
                   "Please wait a moment and try again.")
        elif "401" in error_msg or "unauthorized" in error_msg.lower():
            yield ("⚠️ Authentication error. Please set your HF_TOKEN in the Space settings.")
        else:
            yield f"⚠️ Error generating response: {error_msg}\n\nPlease try again."


# ═══════════════════════════════════════════════════════════════════════
# INDEXING HELPER (combines chunking + embedding)
# ═══════════════════════════════════════════════════════════════════════

def index_contract(text):
    """
    Chunk and embed contract text for RAG retrieval.
    
    Returns: (chunks, embeddings, status_message)
        chunks: list of str
        embeddings: numpy array or None
        status_message: str
    """
    if not text or len(text.strip()) < 50:
        return [], None, "⚠️ No contract text to index"

    chunks = chunk_contract_text(text)
    if not chunks:
        return [], None, "⚠️ Could not split contract into chunks"

    embeddings = build_embeddings(chunks)
    if embeddings is None:
        return chunks, None, "⚠️ Embedding model unavailable — chatbot will not work"

    return (
        chunks,
        embeddings,
        f"✅ Indexed {len(chunks)} chunks ({len(text)} chars) — Ready to chat!"
    )