"""
Bioinformatics with BB Tutor — Complete Application
A production bioinformatics teaching assistant with 7 modules.

Architecture:
- Backend: LLMService (HuggingFace InferenceClient), RAGService (sentence-transformers),
  DocumentParser (PyMuPDF + text), knowledge_base (domain content)
- Frontend: 7 Gradio tabs with ChatInterface, file upload, quiz generation, lesson building
- Data flow: User query → RAG retrieval → LLM with context → streaming response
- Shared state: rag_store (gr.State) holds uploaded document chunks + embeddings across tabs
"""

import gradio as gr
import numpy as np
import os
from pathlib import Path

# ── Conditional imports with fallbacks ────────────────────────────────────────
try:
    import fitz  # PyMuPDF
    HAS_FITZ = True
except ImportError:
    HAS_FITZ = False
    print("Warning: PyMuPDF not available. PDF parsing disabled.")

try:
    from sentence_transformers import SentenceTransformer
    HAS_ST = True
except ImportError:
    HAS_ST = False
    print("Warning: sentence-transformers not available. Embedding search disabled.")

try:
    from huggingface_hub import InferenceClient
    HAS_HF = True
except ImportError:
    HAS_HF = False
    print("Warning: huggingface_hub not available. LLM service disabled.")

# ── Import knowledge base ────────────────────────────────────────────────────
from knowledge_base import (
    DOMAIN_TAXONOMY, WORKFLOWS, GLOSSARY, COMMON_MISCONCEPTIONS,
    SYSTEM_PROMPTS, QUIZ_TEMPLATES, LESSON_TEMPLATE,
    TOPIC_CHOICES, DIFFICULTY_LEVELS, WORKFLOW_CHOICES
)


# ============================================================================
# CONFIGURATION
# ============================================================================

LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
HF_TOKEN = os.environ.get("HF_TOKEN", None)

CHUNK_SIZE = 400
CHUNK_OVERLAP = 60
TOP_K_RETRIEVAL = 3

DEFAULT_SYSTEM_PROMPT = SYSTEM_PROMPTS["ask_tutor"]
DEFAULT_RAG_STATE = {"chunks": [], "embeddings": None}


# ============================================================================
# BACKEND SERVICES — Singleton Pattern
# ============================================================================

class LLMService:
    """Lazy-initialized LLM inference service."""

    _instance = None
    _initialized = False

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self):
        if LLMService._initialized:
            return
        LLMService._initialized = True
        self.client = None
        self._try_init()

    def _try_init(self):
        if not HAS_HF:
            print("LLMService: huggingface_hub not available")
            return
        if not HF_TOKEN:
            print("LLMService: HF_TOKEN not set in environment")
            return
        try:
            self.client = InferenceClient(
                model=LLM_MODEL,
                token=HF_TOKEN,
                timeout=120,
            )
            print("LLMService: Initialized successfully")
        except Exception as e:
            print(f"LLMService: Failed to initialize: {e}")
            self.client = None

    def is_available(self):
        return self.client is not None

    def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
        """Stream chat completion. Yields partial response strings."""
        if not self.is_available():
            yield self._fallback_response(messages)
            return
        try:
            partial = ""
            for chunk in self.client.chat_completion(
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=0.9,
                stream=True,
            ):
                token = ""
                if hasattr(chunk, 'choices') and chunk.choices:
                    choice = chunk.choices[0]
                    if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
                        token = choice.delta.content or ""
                partial += token
                yield partial
        except Exception as e:
            print(f"LLM stream error: {e}")
            yield f"⚠️ LLM API error: {str(e)}\n\nPlease check your HF_TOKEN in Space settings and ensure the model '{LLM_MODEL}' is accessible.\n\nThe tutor is still functional using its knowledge base for many questions — try asking about specific bioinformatics topics!"

    def generate(self, messages, temperature=0.7, max_tokens=1024):
        """Non-streaming generation. Returns complete response."""
        if not self.is_available():
            return self._fallback_response(messages)
        try:
            response = self.client.chat_completion(
                messages=messages,
                max_tokens=max_tokens,
                temperature=temperature,
                top_p=0.9,
                stream=False,
            )
            return response.choices[0].message.content
        except Exception as e:
            print(f"LLM generate error: {e}")
            return f"⚠️ LLM API error: {str(e)}\n\nThe tutor can still answer from its knowledge base. Try asking about specific concepts!"

    def _fallback_response(self, messages):
        """Knowledge-base fallback when LLM unavailable."""
        user_msg = ""
        for m in reversed(messages):
            if isinstance(m, dict) and m.get("role") == "user":
                user_msg = m.get("content", "").lower()
                break
        if not user_msg:
            return "⚠️ **LLM not available.** Add HF_TOKEN in Space settings to enable AI responses.\n\nMeanwhile, the knowledge base covers: DESeq2, variant calling, microbiome diversity, scRNA-seq clustering, and more. Try asking a specific question!"

        response_parts = []
        for term, definition in GLOSSARY.items():
            if term.lower() in user_msg:
                response_parts.append(f"**{term}**: {definition}")
                if len(response_parts) >= 3:
                    break

        for wf_key, wf in WORKFLOWS.items():
            if any(kw in user_msg for kw in wf["name"].lower().split()):
                response_parts.append(f"\n### {wf['name']}")
                for step in wf["steps"][:3]:
                    response_parts.append(f"**Step {step['step']}: {step['name']}**\n{step['description']}")
                break

        for misc in COMMON_MISCONCEPTIONS:
            if misc["domain"].replace("_", " ") in user_msg or any(w in user_msg for w in misc["misconception"].lower().split()[:5]):
                response_parts.append(f"\n⚠️ **Common Misconception**: {misc['misconception']}\n\n✅ **Correction**: {misc['correction']}")
                break

        if response_parts:
            return "📚 *Responding from knowledge base (LLM not configured):*\n\n" + "\n\n".join(response_parts)
        return (
            "⚠️ **AI responses require HF_TOKEN.**\n\n"
            "To enable full AI-powered responses:\n"
            "1. Go to your HuggingFace account → Settings → Access Tokens\n"
            "2. Create a token with 'inference-api' scope\n"
            "3. Add it as a Secret named `HF_TOKEN` in this Space's Settings\n\n"
            "The knowledge base can still answer many questions. Try asking about 'RNA-seq workflow', 'variant calling', or 'microbiome diversity'!"
        )


class RAGService:
    """Document retrieval with lazy embedding model loading."""

    _instance = None
    _initialized = False

    def __new__(cls):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    def __init__(self):
        if RAGService._initialized:
            return
        RAGService._initialized = True
        self.embedder = None
        self.kb_chunks = []
        self.kb_metadata = []
        self.kb_embeddings = None
        self._build_kb_index()

    def _ensure_embedder(self):
        if self.embedder is not None:
            return True
        if not HAS_ST:
            return False
        try:
            print("RAGService: Loading embedding model...")
            self.embedder = SentenceTransformer(EMBED_MODEL)
            print("RAGService: Embedding model loaded")
            if self.kb_chunks:
                self.kb_embeddings = self.embedder.encode(
                    self.kb_chunks,
                    convert_to_numpy=True,
                    normalize_embeddings=True,
                    show_progress_bar=False,
                    batch_size=32,
                )
                print(f"RAGService: KB embedded ({len(self.kb_chunks)} chunks)")
            return True
        except Exception as e:
            print(f"RAGService: Failed to load embedder: {e}")
            return False

    def _build_kb_index(self):
        chunks = []
        metadata = []
        for term, definition in GLOSSARY.items():
            chunks.append(f"{term}: {definition}")
            metadata.append({"source": "glossary", "topic": term, "type": "definition"})
        for wf_key, wf in WORKFLOWS.items():
            for step in wf["steps"]:
                step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
                if step.get("tools"):
                    step_text += f" Tools: {', '.join(step['tools'])}."
                if step.get("common_mistakes"):
                    step_text += " Common mistakes: " + "; ".join(step["common_mistakes"])
                chunks.append(step_text)
                metadata.append({
                    "source": "workflow",
                    "topic": wf["domain"],
                    "type": "workflow_step",
                    "step": step["step"],
                    "workflow": wf_key
                })
        for misc in COMMON_MISCONCEPTIONS:
            text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
            chunks.append(text)
            metadata.append({
                "source": "misconception",
                "topic": misc["domain"],
                "type": "misconception",
                "severity": misc["severity"]
            })
        for key, domain in DOMAIN_TAXONOMY.items():
            text = f"{domain['name']} covers: {', '.join(domain['subtopics'][:10])}"
            chunks.append(text)
            metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
        self.kb_chunks = chunks
        self.kb_metadata = metadata
        print(f"RAGService: Built KB with {len(chunks)} chunks")

    def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
        if not self._ensure_embedder():
            return self._keyword_search(query, top_k)
        try:
            query_embedding = self.embedder.encode(
                [query],
                convert_to_numpy=True,
                normalize_embeddings=True,
            )
            results = []
            if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
                kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
                top_indices = np.argsort(kb_scores)[::-1][:top_k]
                for idx in top_indices:
                    if kb_scores[idx] > 0.15:
                        results.append({
                            "text": self.kb_chunks[idx],
                            "score": float(kb_scores[idx]),
                            "metadata": self.kb_metadata[idx]
                        })
            if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
                user_scores = np.dot(query_embedding, user_embeddings.T)[0]
                top_user = np.argsort(user_scores)[::-1][:top_k]
                for idx in top_user:
                    if user_scores[idx] > 0.15:
                        results.append({
                            "text": user_chunks[idx],
                            "score": float(user_scores[idx]),
                            "metadata": {"source": "uploaded", "type": "user_content"}
                        })
            results.sort(key=lambda x: x["score"], reverse=True)
            return results[:top_k]
        except Exception as e:
            print(f"RAG search error: {e}")
            return self._keyword_search(query, top_k)

    def _keyword_search(self, query, top_k=3):
        query_words = set(query.lower().split())
        scored = []
        for i, chunk in enumerate(self.kb_chunks):
            chunk_words = set(chunk.lower().split())
            overlap = len(query_words & chunk_words)
            if overlap > 0:
                scored.append({
                    "text": chunk,
                    "score": overlap / max(len(query_words), 1),
                    "metadata": self.kb_metadata[i]
                })
        scored.sort(key=lambda x: x["score"], reverse=True)
        return scored[:top_k]

    def embed_chunks(self, chunks):
        if not self._ensure_embedder() or not chunks:
            return None
        try:
            return self.embedder.encode(
                chunks,
                convert_to_numpy=True,
                normalize_embeddings=True,
                show_progress_bar=False,
                batch_size=16,
            )
        except Exception as e:
            print(f"Embed chunks error: {e}")
            return None


class DocumentParser:
    @staticmethod
    def parse_file(filepath):
        if filepath is None:
            return "", []
        filepath = str(filepath)
        ext = Path(filepath).suffix.lower()
        try:
            if ext == ".pdf" and HAS_FITZ:
                return DocumentParser._parse_pdf(filepath)
            elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam", ".bam"):
                return DocumentParser._parse_text(filepath)
            else:
                return f"Unsupported file type: {ext}", []
        except Exception as e:
            return f"Error parsing file: {str(e)}", []

    @staticmethod
    def _parse_pdf(filepath):
        doc = fitz.open(filepath)
        pages = []
        for page_num in range(len(doc)):
            text = doc[page_num].get_text()
            if text.strip():
                pages.append(text)
        doc.close()
        full_text = "\n\n".join(pages)
        chunks = DocumentParser._chunk_text(full_text)
        return full_text, chunks

    @staticmethod
    def _parse_text(filepath):
        with open(filepath, "r", encoding="utf-8", errors="replace") as f:
            text = f.read()
        chunks = DocumentParser._chunk_text(text)
        return text, chunks

    @staticmethod
    def _chunk_text(text, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
        words = text.split()
        if len(words) <= chunk_size:
            return [text] if text.strip() else []
        chunks = []
        for i in range(0, len(words), chunk_size - overlap):
            chunk = " ".join(words[i:i + chunk_size])
            if chunk.strip():
                chunks.append(chunk)
        return chunks


llm_service = LLMService()
rag_service = RAGService()
doc_parser = DocumentParser()
print(f"🧬 BB Tutor initialized. LLM: {llm_service.is_available()}, Embeddings: {rag_service.embedder is not None}")


# ============================================================================
# HELPER FUNCTIONS
# ============================================================================

def _rag_context(query, user_chunks=None, user_embeddings=None):
    results = rag_service.search(query, top_k=TOP_K_RETRIEVAL,
                                   user_chunks=user_chunks, user_embeddings=user_embeddings)
    if not results:
        return ""
    parts = ["RELEVANT KNOWLEDGE BASE CONTEXT:"]
    for r in results:
        source = r["metadata"].get("source", "kb")
        parts.append(f"[{source}] {r['text'][:800]}")
    return "\n".join(parts)


def _format_history(history):
    messages = []
    for h in history:
        if isinstance(h, dict):
            messages.append(h)
        elif isinstance(h, (list, tuple)):
            if len(h) >= 1 and h[0]:
                messages.append({"role": "user", "content": str(h[0])})
            if len(h) >= 2 and h[1]:
                messages.append({"role": "assistant", "content": str(h[1])})
    return messages


# ============================================================================
# MODULE HANDLERS
# ============================================================================

def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_state):
    if not message or not message.strip():
        yield ""
        return
    rag_state = rag_state or DEFAULT_RAG_STATE
    user_chunks = rag_state.get("chunks", [])
    user_embeddings = rag_state.get("embeddings")
    rag_ctx = _rag_context(message, user_chunks, user_embeddings)
    messages = [{"role": "system", "content": system_prompt}]
    if rag_ctx:
        messages.append({"role": "system", "content": rag_ctx})
    messages.extend(_format_history(history))
    messages.append({"role": "user", "content": message})
    for partial in llm_service.stream_chat(messages, temperature, max_tokens):
        yield partial


def process_upload(file, rag_state):
    rag_state = rag_state or DEFAULT_RAG_STATE
    if file is None:
        return "📁 Please upload a file first.", "", rag_state
    full_text, chunks = doc_parser.parse_file(file)
    if not chunks:
        return "⚠️ Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_state
    embeddings = rag_service.embed_chunks(chunks)
    new_state = {"chunks": chunks, "embeddings": embeddings}
    preview = full_text[:2500] if len(full_text) > 2500 else full_text
    msgs = [
        {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
        {"role": "user", "content": f"Analyze and explain this bioinformatics document:\n\n{preview}"}
    ]
    explanation = llm_service.generate(msgs, temperature=0.5, max_tokens=1500)
    stats = f"📊 **Document Stats:** {len(chunks)} chunks, ~{len(full_text.split())} words | "
    stats += f"File type: {Path(str(file)).suffix} | "
    stats += "🤖 AI-powered" if llm_service.is_available() else "📚 Knowledge-base mode"
    stats += f"\n\n---\n\n"
    return stats + explanation, full_text[:5000], new_state


def upload_chat_respond(message, history, rag_state):
    if not message or not message.strip():
        yield ""
        return
    rag_state = rag_state or DEFAULT_RAG_STATE
    user_chunks = rag_state.get("chunks", [])
    user_embeddings = rag_state.get("embeddings")
    if not user_chunks:
        yield "📁 Please upload a document in the panel above, then ask questions about it.\n\nYour uploaded document will be indexed and searchable across all modules!"
        return
    rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
    ctx = "CONTEXT FROM UPLOADED DOCUMENT:\n"
    if rag_results:
        for r in rag_results:
            ctx += f"\n{r['text'][:600]}\n"
    else:
        ctx += "(No highly relevant passages found — answering from general knowledge)\n"
    messages = [
        {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
        {"role": "system", "content": ctx},
    ]
    messages.extend(_format_history(history))
    messages.append({"role": "user", "content": message})
    for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
        yield partial


def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_state):
    if not topic:
        return "❓ Please select or enter a topic first.", ""
    rag_results = rag_service.search(topic, top_k=3)
    context = ""
    if rag_results:
        context = "Reference material:\n" + "\n".join(r["text"][:500] for r in rag_results)
    template_key = {
        "Multiple Choice (MCQ)": "mcq",
        "True/False": "true_false",
        "Short Answer": "short_answer"
    }.get(quiz_type, "mcq")
    quiz_prompt = QUIZ_TEMPLATES[template_key].format(
        n=int(num_questions), topic=topic, difficulty=difficulty
    )
    messages = [{"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]}]
    if context:
        messages.append({"role": "system", "content": context})
    messages.append({"role": "user", "content": quiz_prompt})
    response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
    formatted = f"## 🧠 {topic} Quiz — {difficulty}\n\n"
    formatted += f"*Format: {quiz_type} | Questions: {int(num_questions)}*\n\n---\n\n"
    formatted += response
    return formatted, response


def check_quiz_answers(user_answers, answer_key):
    if not user_answers or not user_answers.strip():
        return "✍️ Please enter your answers above before checking."
    if not answer_key:
        return "⚠️ Please generate a quiz first (use the panel above)."
    messages = [
        {"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare student answers to correct answers. For each: mark ✅ or ❌, explain briefly, provide correct answer if wrong. Be encouraging. Give final score."},
        {"role": "user", "content": f"QUIZ AND ANSWERS:\n{answer_key}\n\nSTUDENT ANSWERS:\n{user_answers}\n\nGrade each:"}
    ]
    return llm_service.generate(messages, temperature=0.3, max_tokens=1500)


def generate_lesson(topic, level, include_exercises, include_quiz):
    if not topic:
        return "📚 Please select or enter a topic."
    rag_results = rag_service.search(topic, top_k=4)
    context = ""
    if rag_results:
        context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
    prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
    if include_exercises:
        prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
    if include_quiz:
        prompt += "\n\nInclude a 5-question self-assessment quiz (with answers)."
    messages = [{"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]}]
    if context:
        messages.append({"role": "system", "content": context})
    messages.append({"role": "user", "content": prompt})
    return llm_service.generate(messages, temperature=0.7, max_tokens=3000)


def workflow_respond(message, history, selected_workflow, temperature):
    if not message or not message.strip():
        yield ""
        return
    workflow_context = ""
    for wf_key, wf in WORKFLOWS.items():
        if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
            workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
            for step in wf["steps"]:
                workflow_context += f"Step {step['step']}: {step['name']}\n"
                workflow_context += f"  {step['description']}\n"
                if step.get("tools"):
                    workflow_context += f"  Tools: {', '.join(step['tools'])}\n"
                if step.get("common_mistakes"):
                    workflow_context += f"  ⚠️ Common mistakes: {'; '.join(step['common_mistakes'])}\n"
                workflow_context += "\n"
            break
    rag_results = rag_service.search(message, top_k=2)
    if rag_results:
        workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"][:500] for r in rag_results)
    messages = [{"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]}]
    if workflow_context:
        messages.append({"role": "system", "content": workflow_context})
    messages.extend(_format_history(history))
    messages.append({"role": "user", "content": message})
    for partial in llm_service.stream_chat(messages, temperature, 1500):
        yield partial


def paper_to_lesson_respond(message, history, output_format, rag_state):
    if not message or not message.strip():
        yield ""
        return
    rag_state = rag_state or DEFAULT_RAG_STATE
    user_chunks = rag_state.get("chunks", [])
    user_embeddings = rag_state.get("embeddings")
    context = ""
    if user_chunks:
        rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
        if rag_results:
            context = "PAPER CONTENT:\n" + "\n".join(r["text"][:600] for r in rag_results)
    format_instruction = {
        "Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
        "Slide Outline": "Create a slide-by-slide outline with key points for each slide.",
        "Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
        "Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
    }.get(output_format, "Create a structured lesson plan.")
    messages = [{"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]}]
    if context:
        messages.append({"role": "system", "content": context})
    messages.extend(_format_history(history))
    full_msg = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
    messages.append({"role": "user", "content": full_msg})
    for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
        yield partial


def viva_respond(message, history, topic, difficulty):
    if not message or not message.strip():
        yield ""
        return
    rag_results = rag_service.search(f"{topic} {message}", top_k=3)
    context = ""
    if rag_results:
        context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
    messages = [
        {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
        {"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY: {difficulty}\n\n{context}"},
    ]
    messages.extend(_format_history(history))
    messages.append({"role": "user", "content": message})
    for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
        yield partial


# ============================================================================
# GRADIO APP ASSEMBLY
# ============================================================================

CUSTOM_CSS = """
.main-header {
    text-align: center; padding: 20px;
    background: linear-gradient(135deg, #1a5276 0%, #2e86c1 50%, #48c9b0 100%);
    border-radius: 12px; margin-bottom: 20px; color: white;
}
.main-header h1 { color: white; font-size: 2em; margin: 0; }
.main-header p { color: #ecf0f1; margin: 5px 0; }
.module-info {
    background: #f0f9ff; border-left: 4px solid #2e86c1;
    padding: 12px 16px; margin-bottom: 16px; border-radius: 0 8px 8px 0;
}
.safety-notice {
    background: #fff3e0; border-left: 4px solid #f39c12;
    padding: 10px 14px; margin-top: 10px; border-radius: 0 8px 8px 0; font-size: 0.9em;
}
.status-badge {
    display: inline-block; padding: 4px 12px; border-radius: 12px;
    font-size: 0.85em; font-weight: bold;
}
.status-on { background: #d4edda; color: #155724; }
.status-off { background: #f8d7da; color: #721c24; }
"""


def build_app():
    with gr.Blocks(title="Bioinformatics with BB Tutor", css=CUSTOM_CSS) as demo:

        # ── Global shared state ─────────────────────────────────────────
        rag_store = gr.State(DEFAULT_RAG_STATE)

        # ── Status indicator ────────────────────────────────────────────
        llm_status = "🟢 AI Enabled" if llm_service.is_available() else "🔴 AI Offline (Knowledge Base Active)"

        # ── Header ─────────────────────────────────────────────────────
        gr.HTML(f"""
        <div class="main-header">
            <h1>🧬 Bioinformatics with BB Tutor</h1>
            <p>AI-powered bioinformatics teaching assistant</p>
            <p style="font-size: 0.85em; opacity: 0.9;">
                RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · and more
            </p>
            <p style="font-size: 0.8em; margin-top: 8px;">
                <span class="status-badge {'status-on' if llm_service.is_available() else 'status-off'}">{llm_status}</span>
            </p>
        </div>
        """)

        with gr.Tabs():

            # ══════════════════════════════════════════════════════════════
            # TAB 1: ASK THE TUTOR
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("🧬 Ask the Tutor", id="ask"):
                gr.HTML('<div class="module-info">💡 Ask any bioinformatics question. RAG-augmented responses from a curated knowledge base covering 15+ domains.</div>')

                # Examples must be list-of-lists matching fn signature: (message, history, system_prompt, temperature, max_tokens, rag_state)
                # Additional inputs: [system_prompt, temperature, max_tokens, rag_store] → 4 values per example
                ask_examples = [
                    ["What is the difference between DESeq2 and edgeR?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["Explain the GATK variant calling pipeline step by step.", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["What is the difference between alpha and beta diversity?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["Why should I use adjusted p-values instead of raw p-values?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["Explain the single-cell RNA-seq analysis workflow.", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["What is BQSR and why is it important?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["How do I choose between STAR and HISAT2 for alignment?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                    ["What common mistakes do students make with DESeq2?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
                ]

                gr.ChatInterface(
                    fn=tutor_respond,
                    type="messages",
                    additional_inputs=[
                        gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="System Prompt", lines=2, visible=False),
                        gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
                        gr.Slider(256, 4096, 1024, step=256, label="Max Tokens", visible=False),
                        rag_store,
                    ],
                    additional_inputs_accordion=gr.Accordion("⚙️ Advanced", open=False, visible=False),
                    examples=ask_examples,
                )
                gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> Not for clinical interpretation. Always consult qualified professionals for clinical genomics.</div>')

            # ══════════════════════════════════════════════════════════════
            # TAB 2: UPLOAD & EXPLAIN
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("📄 Upload & Explain", id="upload"):
                gr.HTML('<div class="module-info">📄 Upload bioinformatics documents (PDF, TXT, FASTA, VCF, etc.) and get AI-powered analysis. Content is indexed and searchable across all modules.</div>')

                with gr.Row():
                    with gr.Column(scale=1):
                        file_input = gr.File(
                            label="Upload Document",
                            file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
                                       ".fasta", ".fa", ".fastq", ".vcf", ".bed",
                                       ".gff", ".gtf", ".sam", ".bam"],
                            file_count="single", type="filepath",
                        )
                        process_btn = gr.Button("🔍 Analyze Document", variant="primary")
                        gr.Markdown("**Supported:** PDF, text, FASTA/FASTQ, VCF, BED, GFF/GTF, SAM/BAM, CSV/TSV")
                    with gr.Column(scale=2):
                        explanation_output = gr.Markdown(label="Analysis & Explanation")

                with gr.Accordion("📝 Raw Extracted Text", open=False):
                    raw_text_output = gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)

                process_btn.click(
                    fn=process_upload,
                    inputs=[file_input, rag_store],
                    outputs=[explanation_output, raw_text_output, rag_store],
                )

                gr.Markdown("### 💬 Chat About Your Document")

                # fn signature: (message, history, rag_state) → 1 additional input
                upload_chat_examples = [
                    ["Summarize the key methods in this paper.", DEFAULT_RAG_STATE],
                    ["What bioinformatics tools are mentioned?", DEFAULT_RAG_STATE],
                    ["Explain the main findings in simple terms.", DEFAULT_RAG_STATE],
                    ["What are the limitations of this analysis?", DEFAULT_RAG_STATE],
                ]

                gr.ChatInterface(
                    fn=upload_chat_respond,
                    type="messages",
                    additional_inputs=[rag_store],
                    examples=upload_chat_examples,
                )

            # ══════════════════════════════════════════════════════════════
            # TAB 3: QUIZ ME
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("❓ Quiz Me", id="quiz"):
                gr.HTML('<div class="module-info">🧠 Test your knowledge with AI-generated quizzes across all bioinformatics domains.</div>')

                with gr.Row():
                    quiz_topic = gr.Dropdown(
                        choices=TOPIC_CHOICES, label="Select Topic",
                        allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
                    )
                    quiz_type = gr.Radio(
                        choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
                        value="Multiple Choice (MCQ)", label="Format"
                    )
                with gr.Row():
                    quiz_difficulty = gr.Radio(
                        choices=DIFFICULTY_LEVELS, value="Intermediate", label="Difficulty"
                    )
                    num_questions = gr.Slider(1, 10, 5, step=1, label="# Questions")
                    generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary")

                quiz_output = gr.Markdown(label="Generated Quiz")
                answer_key_state = gr.State("")

                generate_quiz_btn.click(
                    fn=generate_quiz,
                    inputs=[quiz_topic, quiz_type, num_questions, quiz_difficulty, rag_store],
                    outputs=[quiz_output, answer_key_state],
                )

                gr.Markdown("---")
                gr.Markdown("### ✍️ Submit Your Answers")
                with gr.Row():
                    user_answers = gr.Textbox(
                        label="Your Answers (e.g., '1: A, 2: B')",
                        lines=5, placeholder="Type your answers here...", scale=3
                    )
                    check_btn = gr.Button("✅ Check", variant="primary", scale=1)
                feedback_output = gr.Markdown(label="Feedback")

                check_btn.click(
                    fn=check_quiz_answers,
                    inputs=[user_answers, answer_key_state],
                    outputs=[feedback_output],
                )

            # ══════════════════════════════════════════════════════════════
            # TAB 4: BUILD A LESSON
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("📚 Build a Lesson", id="lesson"):
                gr.HTML('<div class="module-info">📚 Generate structured lessons with learning objectives, explanations, exercises, and self-assessment quizzes.</div>')

                with gr.Row():
                    lesson_topic = gr.Dropdown(
                        choices=TOPIC_CHOICES, label="Lesson Topic",
                        allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
                    )
                    lesson_level = gr.Radio(
                        choices=DIFFICULTY_LEVELS, value="Intermediate", label="Level"
                    )
                with gr.Row():
                    include_exercises = gr.Checkbox(label="Include Exercises", value=True)
                    include_quiz = gr.Checkbox(label="Include Quiz", value=True)
                    generate_lesson_btn = gr.Button("📝 Generate Lesson", variant="primary")

                lesson_output = gr.Markdown(label="Generated Lesson")

                generate_lesson_btn.click(
                    fn=generate_lesson,
                    inputs=[lesson_topic, lesson_level, include_exercises, include_quiz],
                    outputs=[lesson_output],
                )

            # ══════════════════════════════════════════════════════════════
            # TAB 5: WORKFLOW COACH
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("🔬 Workflow Coach", id="workflow"):
                gr.HTML('<div class="module-info">🔬 Step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask about any step.</div>')

                workflow_selector = gr.Dropdown(
                    choices=WORKFLOW_CHOICES, label="Select Workflow",
                    value="Bulk RNA-seq: Full DE Analysis Pipeline", allow_custom_value=True,
                )

                # fn signature: (message, history, selected_workflow, temperature) → 2 additional inputs
                workflow_examples = [
                    ["Walk me through the complete pipeline from raw FASTQ to DE results.", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
                    ["I'm at alignment. What should I check before counting?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
                    ["My mapping rate is only 45%. What could be wrong?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
                    ["How do I choose between STAR and HISAT2?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
                    ["What parameters for GATK HaplotypeCaller on exome data?", "Exome Sequencing: Variant Calling Pipeline", 0.7],
                    ["How do I set DADA2 truncation parameters?", "Microbiome: 16S Amplicon Analysis (QIIME2)", 0.7],
                ]

                gr.ChatInterface(
                    fn=workflow_respond,
                    type="messages",
                    additional_inputs=[
                        workflow_selector,
                        gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
                    ],
                    additional_inputs_accordion=gr.Accordion("⚙️", open=False, visible=False),
                    examples=workflow_examples,
                )

            # ══════════════════════════════════════════════════════════════
            # TAB 6: PAPER TO LESSON
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("📰 Paper to Lesson", id="paper"):
                gr.HTML('<div class="module-info">📰 Convert research papers into teaching material. Upload a paper in the Upload tab first, then generate lessons, slides, or quizzes from it.</div>')

                output_format = gr.Radio(
                    choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
                    value="Lesson Plan", label="Output Format"
                )

                # fn signature: (message, history, output_format, rag_state) → 2 additional inputs
                paper_examples = [
                    ["Convert this paper into a 45-minute lecture plan.", "Lesson Plan", DEFAULT_RAG_STATE],
                    ["Create a slide outline covering the key methods.", "Slide Outline", DEFAULT_RAG_STATE],
                    ["Generate study notes on the bioinformatics methods.", "Study Notes", DEFAULT_RAG_STATE],
                    ["Create quiz questions on this paper's methodology.", "Quiz Questions", DEFAULT_RAG_STATE],
                ]

                gr.ChatInterface(
                    fn=paper_to_lesson_respond,
                    type="messages",
                    additional_inputs=[output_format, rag_store],
                    examples=paper_examples,
                )

            # ══════════════════════════════════════════════════════════════
            # TAB 7: VIVA PRACTICE
            # ══════════════════════════════════════════════════════════════
            with gr.Tab("🎓 Viva Practice", id="viva"):
                gr.HTML('<div class="module-info">🎓 Practice oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes deeper understanding.</div>')

                with gr.Row():
                    viva_topic = gr.Dropdown(
                        choices=TOPIC_CHOICES, label="Viva Topic",
                        allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
                    )
                    viva_difficulty = gr.Radio(
                        choices=DIFFICULTY_LEVELS, value="Intermediate", label="Difficulty"
                    )

                # fn signature: (message, history, topic, difficulty) → 2 additional inputs
                viva_examples = [
                    ["I'm ready for my viva. Start with your first question.", "RNA-seq: Differential Expression (DESeq2)", "Intermediate"],
                    ["Focus on the statistical aspects of RNA-seq.", "RNA-seq: Differential Expression (DESeq2)", "Intermediate"],
                    ["Ask me about variant calling and interpretation.", "Variant Interpretation: ACMG Guidelines", "Intermediate"],
                    ["Test my understanding of microbiome diversity.", "Microbiome: Alpha & Beta Diversity", "Intermediate"],
                ]

                gr.ChatInterface(
                    fn=viva_respond,
                    type="messages",
                    additional_inputs=[viva_topic, viva_difficulty],
                    examples=viva_examples,
                )

        # ── Footer ─────────────────────────────────────────────────────
        gr.HTML("""
        <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
            <p><strong>Bioinformatics with BB Tutor</strong> — Educational AI Assistant</p>
            <p>⚠️ For educational purposes only. Not for clinical use.</p>
            <p>RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · Methylation · Small RNA · Targeted Panels · Long-read · Spatial · Multi-omics</p>
        </div>
        """)

    return demo


if __name__ == "__main__":
    demo = build_app()
    demo.launch(server_name="0.0.0.0", server_port=7860, share=False)