Spaces:

Babajaan
/

bioinformatics-bb-tutor

Sleeping

App Files Files Community

Babajaan commited on Apr 23

Commit

6e8bd4c

verified ·

1 Parent(s): fe2b396

Add complete app.py with all 7 modules

Browse files

Files changed (1) hide show

app.py +1089 -0

app.py ADDED Viewed

	@@ -0,0 +1,1089 @@

+"""
+Bioinformatics with BB Tutor — Complete Application
+A production-oriented bioinformatics teaching assistant with 7 modules.
+"""
+import gradio as gr
+import numpy as np
+import json
+import os
+import re
+import time
+import hashlib
+from pathlib import Path
+# ── Conditional imports with fallbacks ────────────────────────────────────────
+try:
+    import fitz  # PyMuPDF
+    HAS_FITZ = True
+except ImportError:
+    HAS_FITZ = False
+try:
+    from sentence_transformers import SentenceTransformer
+    HAS_ST = True
+except ImportError:
+    HAS_ST = False
+try:
+    from huggingface_hub import InferenceClient
+    HAS_HF = True
+except ImportError:
+    HAS_HF = False
+try:
+    import pandas as pd
+    HAS_PANDAS = True
+except ImportError:
+    HAS_PANDAS = False
+# ── Import knowledge base ────────────────────────────────────────────────────
+from knowledge_base import (
+    DOMAIN_TAXONOMY, WORKFLOWS, GLOSSARY, COMMON_MISCONCEPTIONS,
+    SYSTEM_PROMPTS, QUIZ_TEMPLATES, LESSON_TEMPLATE,
+    TOPIC_CHOICES, DIFFICULTY_LEVELS, WORKFLOW_CHOICES
+)
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+# Model configuration - uses HF Inference API
+LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
+EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+# RAG configuration
+CHUNK_SIZE = 400  # words per chunk
+CHUNK_OVERLAP = 60  # words overlap
+TOP_K_RETRIEVAL = 3
+# ============================================================================
+# BACKEND SERVICES
+# ============================================================================
+class LLMService:
+    """Singleton LLM inference service using HuggingFace Inference API."""
+    def __init__(self):
+        self.client = None
+        if HAS_HF and HF_TOKEN:
+            try:
+                self.client = InferenceClient(
+                    model=LLM_MODEL,
+                    token=HF_TOKEN,
+                    timeout=120,
+                )
+            except Exception as e:
+                print(f"Warning: Could not initialize InferenceClient: {e}")
+    def is_available(self):
+        return self.client is not None
+    def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
+        """Stream a chat completion. Yields partial response strings."""
+        if not self.is_available():
+            yield self._fallback_response(messages)
+            return
+        try:
+            partial = ""
+            for chunk in self.client.chat_completion(
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=0.9,
+                stream=True,
+            ):
+                token = chunk.choices[0].delta.content or ""
+                partial += token
+                yield partial
+        except Exception as e:
+            yield f"⚠️ LLM API error: {str(e)}\n\nPlease check that HF_TOKEN is set correctly in the Space settings and the model {LLM_MODEL} is accessible."
+    def generate(self, messages, temperature=0.7, max_tokens=1024):
+        """Non-streaming generation. Returns complete response."""
+        if not self.is_available():
+            return self._fallback_response(messages)
+        try:
+            response = self.client.chat_completion(
+                messages=messages,
+                max_tokens=max_tokens,
+                temperature=temperature,
+                top_p=0.9,
+                stream=False,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            return f"⚠️ LLM API error: {str(e)}"
+    def _fallback_response(self, messages):
+        """Knowledge-base powered fallback when LLM is not available."""
+        user_msg = ""
+        for m in reversed(messages):
+            if m["role"] == "user":
+                user_msg = m["content"].lower()
+                break
+        # Search knowledge base for relevant content
+        response_parts = []
+        # Check glossary
+        for term, definition in GLOSSARY.items():
+            if term.lower() in user_msg or any(w in user_msg for w in term.lower().split()):
+                response_parts.append(f"**{term}**: {definition}")
+        # Check workflows
+        for wf_key, wf in WORKFLOWS.items():
+            if any(keyword in user_msg for keyword in wf["name"].lower().split()):
+                response_parts.append(f"\n### {wf['name']}\n")
+                for step in wf["steps"][:3]:
+                    response_parts.append(f"**Step {step['step']}: {step['name']}**\n{step['description']}")
+                break
+        # Check misconceptions
+        for misc in COMMON_MISCONCEPTIONS:
+            keywords = misc["misconception"].lower().split()
+            if any(w in user_msg for w in keywords if len(w) > 4):
+                response_parts.append(f"\n⚠️ **Common Misconception**: {misc['misconception']}\n\n✅ **Correction**: {misc['correction']}")
+                break
+        if response_parts:
+            return "📚 *Responding from knowledge base (LLM not configured):*\n\n" + "\n\n".join(response_parts)
+        else:
+            return (
+                "⚠️ **LLM is not configured.** To enable AI-powered responses:\n\n"
+                "1. Go to Space Settings → Repository Secrets\n"
+                "2. Add `HF_TOKEN` with your HuggingFace API token\n"
+                "3. The token needs access to inference API\n\n"
+                "Currently showing knowledge base results only. "
+                "Try asking about specific topics like 'DESeq2', 'variant calling', or 'FASTQ quality'."
+            )
+class RAGService:
+    """Document retrieval service with embedding-based search."""
+    def __init__(self):
+        self.embedder = None
+        if HAS_ST:
+            try:
+                self.embedder = SentenceTransformer(EMBED_MODEL)
+            except Exception as e:
+                print(f"Warning: Could not load embedding model: {e}")
+        # Pre-build knowledge base index
+        self.kb_chunks, self.kb_metadata = self._build_kb_index()
+        self.kb_embeddings = None
+        if self.embedder and self.kb_chunks:
+            try:
+                self.kb_embeddings = self.embedder.encode(
+                    self.kb_chunks,
+                    convert_to_numpy=True,
+                    normalize_embeddings=True,
+                    show_progress_bar=False,
+                    batch_size=32,
+                )
+            except Exception as e:
+                print(f"Warning: Could not embed knowledge base: {e}")
+    def _build_kb_index(self):
+        """Build searchable chunks from the knowledge base."""
+        chunks = []
+        metadata = []
+        # Index glossary terms
+        for term, definition in GLOSSARY.items():
+            chunks.append(f"{term}: {definition}")
+            metadata.append({"source": "glossary", "topic": term, "type": "definition"})
+        # Index workflow steps
+        for wf_key, wf in WORKFLOWS.items():
+            for step in wf["steps"]:
+                step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
+                if step.get("tools"):
+                    step_text += f" Tools: {', '.join(step['tools'])}."
+                if step.get("common_mistakes"):
+                    step_text += " Common mistakes: " + "; ".join(step["common_mistakes"])
+                chunks.append(step_text)
+                metadata.append({
+                    "source": "workflow",
+                    "topic": wf["domain"],
+                    "type": "workflow_step",
+                    "step": step["step"],
+                    "workflow": wf_key
+                })
+        # Index misconceptions
+        for misc in COMMON_MISCONCEPTIONS:
+            text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
+            chunks.append(text)
+            metadata.append({
+                "source": "misconception",
+                "topic": misc["domain"],
+                "type": "misconception",
+                "severity": misc["severity"]
+            })
+        # Index domain taxonomy
+        for key, domain in DOMAIN_TAXONOMY.items():
+            text = f"{domain['name']} covers these subtopics: {', '.join(domain['subtopics'])}."
+            chunks.append(text)
+            metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
+        return chunks, metadata
+    def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
+        """Search the knowledge base and optional user-uploaded content."""
+        if not self.embedder:
+            return self._keyword_search(query, top_k)
+        try:
+            query_embedding = self.embedder.encode(
+                [query],
+                convert_to_numpy=True,
+                normalize_embeddings=True,
+            )
+            results = []
+            # Search knowledge base
+            if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
+                kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
+                top_indices = np.argsort(kb_scores)[::-1][:top_k]
+                for idx in top_indices:
+                    if kb_scores[idx] > 0.2:  # minimum relevance threshold
+                        results.append({
+                            "text": self.kb_chunks[idx],
+                            "score": float(kb_scores[idx]),
+                            "metadata": self.kb_metadata[idx]
+                        })
+            # Search user-uploaded content
+            if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
+                user_scores = np.dot(query_embedding, user_embeddings.T)[0]
+                top_user = np.argsort(user_scores)[::-1][:top_k]
+                for idx in top_user:
+                    if user_scores[idx] > 0.2:
+                        results.append({
+                            "text": user_chunks[idx],
+                            "score": float(user_scores[idx]),
+                            "metadata": {"source": "uploaded_document", "type": "user_content"}
+                        })
+            # Sort by score and return top_k
+            results.sort(key=lambda x: x["score"], reverse=True)
+            return results[:top_k]
+        except Exception as e:
+            print(f"Embedding search error: {e}")
+            return self._keyword_search(query, top_k)
+    def _keyword_search(self, query, top_k=3):
+        """Fallback keyword-based search."""
+        query_words = set(query.lower().split())
+        scored = []
+        for i, chunk in enumerate(self.kb_chunks):
+            chunk_words = set(chunk.lower().split())
+            overlap = len(query_words & chunk_words)
+            if overlap > 0:
+                scored.append({
+                    "text": chunk,
+                    "score": overlap / max(len(query_words), 1),
+                    "metadata": self.kb_metadata[i]
+                })
+        scored.sort(key=lambda x: x["score"], reverse=True)
+        return scored[:top_k]
+    def embed_chunks(self, chunks):
+        """Embed a list of text chunks. Returns numpy array or None."""
+        if not self.embedder or not chunks:
+            return None
+        try:
+            return self.embedder.encode(
+                chunks,
+                convert_to_numpy=True,
+                normalize_embeddings=True,
+                show_progress_bar=False,
+                batch_size=32,
+            )
+        except Exception:
+            return None
+class DocumentParser:
+    """Parse uploaded documents into text chunks."""
+    @staticmethod
+    def parse_file(filepath):
+        """Extract text from uploaded file."""
+        if filepath is None:
+            return "", []
+        filepath = str(filepath)
+        ext = Path(filepath).suffix.lower()
+        try:
+            if ext == ".pdf" and HAS_FITZ:
+                return DocumentParser._parse_pdf(filepath)
+            elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam"):
+                return DocumentParser._parse_text(filepath)
+            else:
+                return f"Unsupported file type: {ext}", []
+        except Exception as e:
+            return f"Error parsing file: {str(e)}", []
+    @staticmethod
+    def _parse_pdf(filepath):
+        doc = fitz.open(filepath)
+        pages = []
+        for page_num in range(len(doc)):
+            page = doc[page_num]
+            text = page.get_text()
+            if text.strip():
+                pages.append(text)
+        doc.close()
+        full_text = "\n\n".join(pages)
+        chunks = DocumentParser._chunk_text(full_text)
+        return full_text, chunks
+    @staticmethod
+    def _parse_text(filepath):
+        with open(filepath, "r", encoding="utf-8", errors="replace") as f:
+            text = f.read()
+        chunks = DocumentParser._chunk_text(text)
+        return text, chunks
+    @staticmethod
+    def _chunk_text(text, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
+        words = text.split()
+        if len(words) <= chunk_size:
+            return [text] if text.strip() else []
+        chunks = []
+        for i in range(0, len(words), chunk_size - overlap):
+            chunk = " ".join(words[i:i + chunk_size])
+            if chunk.strip():
+                chunks.append(chunk)
+        return chunks
+# ============================================================================
+# INITIALIZE SERVICES
+# ============================================================================
+print("🧬 Initializing BB Tutor services...")
+llm_service = LLMService()
+rag_service = RAGService()
+doc_parser = DocumentParser()
+print(f"   LLM available: {llm_service.is_available()}")
+print(f"   RAG embedder available: {rag_service.embedder is not None}")
+print(f"   Knowledge base chunks: {len(rag_service.kb_chunks)}")
+print("✅ BB Tutor services initialized!")
+# ============================================================================
+# MODULE 1: ASK THE TUTOR
+# ============================================================================
+def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_store):
+    """Main tutor chat handler with RAG-augmented responses."""
+    if not message.strip():
+        yield ""
+        return
+    # Retrieve relevant context
+    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
+    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
+    rag_results = rag_service.search(
+        message,
+        top_k=TOP_K_RETRIEVAL,
+        user_chunks=user_chunks,
+        user_embeddings=user_embeddings
+    )
+    # Build context from retrieved chunks
+    context_parts = []
+    if rag_results:
+        context_parts.append("RELEVANT KNOWLEDGE BASE CONTEXT:")
+        for r in rag_results:
+            source = r["metadata"].get("source", "unknown")
+            context_parts.append(f"[Source: {source}] {r['text']}")
+    # Build messages
+    messages = [{"role": "system", "content": system_prompt}]
+    if context_parts:
+        messages.append({
+            "role": "system",
+            "content": "\n".join(context_parts)
+        })
+    # Add conversation history
+    for h in history:
+        messages.append(h)
+    messages.append({"role": "user", "content": message})
+    # Stream response
+    for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=max_tokens):
+        yield partial
+# ============================================================================
+# MODULE 2: UPLOAD AND EXPLAIN
+# ============================================================================
+def process_upload(file, rag_store):
+    """Process an uploaded file: extract text, chunk, embed, explain."""
+    if file is None:
+        return "Please upload a file first.", "", rag_store
+    full_text, chunks = doc_parser.parse_file(file)
+    if not chunks:
+        return "Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_store
+    # Embed the chunks
+    embeddings = rag_service.embed_chunks(chunks)
+    # Update RAG store with uploaded content
+    new_store = dict(rag_store) if isinstance(rag_store, dict) else {"chunks": [], "embeddings": None}
+    new_store["chunks"] = chunks
+    if embeddings is not None:
+        new_store["embeddings"] = embeddings
+    # Generate explanation
+    preview = full_text[:3000] if len(full_text) > 3000 else full_text
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
+        {"role": "user", "content": f"Please analyze and explain this uploaded content:\n\n{preview}"}
+    ]
+    explanation = llm_service.generate(messages, temperature=0.5, max_tokens=1500)
+    # Add stats
+    stats = f"📊 **Document Stats:** {len(chunks)} chunks, ~{len(full_text.split())} words extracted\n\n---\n\n"
+    return stats + explanation, full_text[:5000], new_store
+def upload_chat_respond(message, history, rag_store):
+    """Chat about uploaded documents with RAG context."""
+    if not message.strip():
+        yield ""
+        return
+    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
+    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
+    if not user_chunks:
+        yield "Please upload a document first using the upload panel above, then ask questions about it."
+        return
+    # Retrieve relevant chunks from uploaded doc
+    rag_results = rag_service.search(
+        message, top_k=4,
+        user_chunks=user_chunks,
+        user_embeddings=user_embeddings
+    )
+    context = "CONTEXT FROM UPLOADED DOCUMENT:\n"
+    for r in rag_results:
+        context += f"\n{r['text']}\n"
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
+        {"role": "system", "content": context},
+    ]
+    for h in history:
+        messages.append(h)
+    messages.append({"role": "user", "content": message})
+    for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
+        yield partial
+# ============================================================================
+# MODULE 3: QUIZ ME
+# ============================================================================
+def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_store):
+    """Generate a quiz on a bioinformatics topic."""
+    if not topic:
+        return "Please select or enter a topic first.", ""
+    # Get relevant context
+    rag_results = rag_service.search(topic, top_k=3)
+    context = ""
+    if rag_results:
+        context = "Use this reference material:\n" + "\n".join(r["text"] for r in rag_results)
+    template_key = {
+        "Multiple Choice (MCQ)": "mcq",
+        "True/False": "true_false",
+        "Short Answer": "short_answer"
+    }.get(quiz_type, "mcq")
+    quiz_prompt = QUIZ_TEMPLATES[template_key].format(
+        n=int(num_questions),
+        topic=topic,
+        difficulty=difficulty
+    )
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]},
+    ]
+    if context:
+        messages.append({"role": "system", "content": context})
+    messages.append({"role": "user", "content": quiz_prompt})
+    response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
+    # Format nicely
+    formatted = f"## 🧠 {topic} Quiz — {difficulty}\n\n"
+    formatted += f"*Type: {quiz_type} | Questions: {int(num_questions)}*\n\n---\n\n"
+    formatted += response
+    # Store answer key
+    answer_key = response
+    return formatted, answer_key
+def check_quiz_answers(user_answers, answer_key):
+    """Provide feedback on quiz answers."""
+    if not user_answers.strip():
+        return "Please enter your answers first."
+    if not answer_key:
+        return "Please generate a quiz first."
+    messages = [
+        {"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare the student's answers to the correct answers. For each answer: mark it ✅ correct or ❌ incorrect, explain why, and provide the correct answer if wrong. Be encouraging but accurate. Give a final score."},
+        {"role": "user", "content": f"QUIZ AND ANSWER KEY:\n{answer_key}\n\nSTUDENT'S ANSWERS:\n{user_answers}\n\nPlease grade each answer:"}
+    ]
+    return llm_service.generate(messages, temperature=0.3, max_tokens=1500)
+# ============================================================================
+# MODULE 4: BUILD A LESSON
+# ============================================================================
+def generate_lesson(topic, level, include_exercises, include_quiz):
+    """Generate a structured lesson on a bioinformatics topic."""
+    if not topic:
+        return "Please select or enter a topic."
+    # Get relevant context
+    rag_results = rag_service.search(topic, top_k=4)
+    context = ""
+    if rag_results:
+        context = "Reference material:\n" + "\n".join(r["text"] for r in rag_results)
+    prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
+    if include_exercises:
+        prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
+    if include_quiz:
+        prompt += "\n\nInclude a 5-question self-assessment quiz at the end (with answers)."
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]},
+    ]
+    if context:
+        messages.append({"role": "system", "content": context})
+    messages.append({"role": "user", "content": prompt})
+    return llm_service.generate(messages, temperature=0.7, max_tokens=3000)
+# ============================================================================
+# MODULE 5: WORKFLOW COACH
+# ============================================================================
+def workflow_respond(message, history, selected_workflow, temperature):
+    """Workflow coaching chat handler."""
+    if not message.strip():
+        yield ""
+        return
+    # Get workflow context
+    workflow_context = ""
+    for wf_key, wf in WORKFLOWS.items():
+        if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
+            workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
+            for step in wf["steps"]:
+                workflow_context += f"Step {step['step']}: {step['name']}\n"
+                workflow_context += f"  Description: {step['description']}\n"
+                workflow_context += f"  Tools: {', '.join(step.get('tools', []))}\n"
+                if step.get("common_mistakes"):
+                    workflow_context += f"  Common mistakes: {'; '.join(step['common_mistakes'])}\n"
+                workflow_context += "\n"
+            break
+    # Also search RAG
+    rag_results = rag_service.search(message, top_k=2)
+    if rag_results:
+        workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"] for r in rag_results)
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]},
+    ]
+    if workflow_context:
+        messages.append({"role": "system", "content": workflow_context})
+    for h in history:
+        messages.append(h)
+    messages.append({"role": "user", "content": message})
+    for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=1500):
+        yield partial
+# ============================================================================
+# MODULE 6: PAPER TO LESSON
+# ============================================================================
+def paper_to_lesson_respond(message, history, output_format, rag_store):
+    """Convert paper content into teaching material."""
+    if not message.strip():
+        yield ""
+        return
+    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
+    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
+    context = ""
+    if user_chunks:
+        rag_results = rag_service.search(
+            message, top_k=4,
+            user_chunks=user_chunks,
+            user_embeddings=user_embeddings
+        )
+        if rag_results:
+            context = "PAPER CONTENT:\n" + "\n".join(r["text"] for r in rag_results)
+    format_instruction = {
+        "Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
+        "Slide Outline": "Create a slide-by-slide outline with key points for each slide (title + 3-5 bullet points per slide).",
+        "Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
+        "Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
+    }.get(output_format, "Create a structured lesson plan.")
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]},
+    ]
+    if context:
+        messages.append({"role": "system", "content": context})
+    for h in history:
+        messages.append(h)
+    full_message = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
+    messages.append({"role": "user", "content": full_message})
+    for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
+        yield partial
+# ============================================================================
+# MODULE 7: VIVA PRACTICE
+# ============================================================================
+def viva_respond(message, history, topic, difficulty):
+    """Viva voce practice session handler."""
+    if not message.strip():
+        yield ""
+        return
+    # Get topic context
+    rag_results = rag_service.search(f"{topic} {message}", top_k=3)
+    context = ""
+    if rag_results:
+        context = "REFERENCE MATERIAL:\n" + "\n".join(r["text"] for r in rag_results)
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
+        {"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY LEVEL: {difficulty}\n\n{context}"},
+    ]
+    for h in history:
+        messages.append(h)
+    messages.append({"role": "user", "content": message})
+    for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
+        yield partial
+def start_viva(topic, difficulty):
+    """Generate the opening viva question."""
+    if not topic:
+        return "Please select a topic to begin the viva."
+    rag_results = rag_service.search(topic, top_k=2)
+    context = ""
+    if rag_results:
+        context = "\n".join(r["text"] for r in rag_results)
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
+        {"role": "system", "content": f"Topic: {topic}\nDifficulty: {difficulty}\n\nReference: {context}"},
+        {"role": "user", "content": f"I'm ready for my viva on {topic}. Please start with your first question."}
+    ]
+    return llm_service.generate(messages, temperature=0.7, max_tokens=500)
+# ============================================================================
+# GRADIO APP ASSEMBLY
+# ============================================================================
+# Custom CSS
+CUSTOM_CSS = """
+.main-header {
+    text-align: center;
+    padding: 20px;
+    background: linear-gradient(135deg, #1a5276 0%, #2e86c1 50%, #48c9b0 100%);
+    border-radius: 12px;
+    margin-bottom: 20px;
+    color: white;
+}
+.main-header h1 { color: white; font-size: 2em; margin-bottom: 5px; }
+.main-header p { color: #ecf0f1; font-size: 1.1em; }
+.module-info {
+    background: #f0f9ff;
+    border-left: 4px solid #2e86c1;
+    padding: 12px 16px;
+    margin-bottom: 16px;
+    border-radius: 0 8px 8px 0;
+}
+.safety-notice {
+    background: #fff3e0;
+    border-left: 4px solid #f39c12;
+    padding: 10px 14px;
+    margin-top: 10px;
+    border-radius: 0 8px 8px 0;
+    font-size: 0.9em;
+}
+"""
+def build_app():
+    with gr.Blocks(title="Bioinformatics with BB Tutor") as demo:
+        # Shared state across all tabs
+        rag_store = gr.State({"chunks": [], "embeddings": None})
+        # ── Header ────────────────────────────────────────────────────────
+        gr.HTML("""
+        <div class="main-header">
+            <h1>🧬 Bioinformatics with BB Tutor</h1>
+            <p>Your AI-powered bioinformatics teaching assistant</p>
+            <p style="font-size: 0.85em; opacity: 0.9;">
+                RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · and more
+            </p>
+        </div>
+        """)
+        with gr.Tabs():
+            # ══════════════════════════════════════════════════════════════
+            # TAB 1: ASK THE TUTOR
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("🧬 Ask the Tutor", id="ask"):
+                gr.HTML('<div class="module-info">💡 Ask any bioinformatics question. The tutor uses a curated knowledge base to provide accurate, educational answers with proper context.</div>')
+                gr.ChatInterface(
+                    fn=tutor_respond,
+                    type="messages",
+                    additional_inputs=[
+                        gr.Textbox(
+                            value=SYSTEM_PROMPTS["ask_tutor"],
+                            label="System Prompt",
+                            lines=3,
+                            visible=True,
+                        ),
+                        gr.Slider(
+                            minimum=0.1, maximum=1.5, value=0.7, step=0.1,
+                            label="Temperature (lower = more focused, higher = more creative)"
+                        ),
+                        gr.Slider(
+                            minimum=256, maximum=4096, value=1024, step=256,
+                            label="Max Response Length (tokens)"
+                        ),
+                        rag_store,
+                    ],
+                    additional_inputs_accordion=gr.Accordion("⚙️ Advanced Settings", open=False),
+                    examples=[
+                        "What is the difference between DESeq2 and edgeR for differential expression analysis?",
+                        "Explain the GATK Best Practices variant calling pipeline step by step.",
+                        "What is the difference between alpha and beta diversity in microbiome analysis?",
+                        "Why should I use adjusted p-values instead of raw p-values?",
+                        "Explain the single-cell RNA-seq analysis workflow from raw data to cell type annotation.",
+                        "What is BQSR and why is it important in variant calling?",
+                    ],
+                    save_history=True,
+                )
+                gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> This tutor provides learning support, not clinical interpretations. Always consult qualified professionals for clinical genomics decisions.</div>')
+            # ══════════════════════════════════════════════════════════════
+            # TAB 2: UPLOAD AND EXPLAIN
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("📄 Upload & Explain", id="upload"):
+                gr.HTML('<div class="module-info">📄 Upload bioinformatics documents (PDFs, text files, VCFs, FASTA, etc.) and get AI-powered explanations. Uploaded content becomes available for Q&A across all modules.</div>')
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        file_input = gr.File(
+                            label="Upload Document",
+                            file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
+                                       ".fasta", ".fa", ".fastq", ".vcf", ".bed",
+                                       ".gff", ".gtf", ".sam"],
+                            file_count="single",
+                            type="filepath",
+                        )
+                        process_btn = gr.Button("🔍 Analyze Document", variant="primary", size="lg")
+                    with gr.Column(scale=2):
+                        explanation_output = gr.Markdown(label="Analysis & Explanation")
+                with gr.Accordion("📝 Raw Extracted Text", open=False):
+                    raw_text_output = gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
+                process_btn.click(
+                    fn=process_upload,
+                    inputs=[file_input, rag_store],
+                    outputs=[explanation_output, raw_text_output, rag_store],
+                )
+                gr.Markdown("### 💬 Ask Questions About Your Document")
+                gr.ChatInterface(
+                    fn=upload_chat_respond,
+                    type="messages",
+                    additional_inputs=[rag_store],
+                    additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
+                    examples=[
+                        "Summarize the key methods used in this paper.",
+                        "What bioinformatics tools are mentioned?",
+                        "Explain the main findings in simple terms.",
+                        "What are the limitations of this analysis?",
+                    ],
+                )
+            # ══════════════════════════════════════════════════════════════
+            # TAB 3: QUIZ ME
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("❓ Quiz Me", id="quiz"):
+                gr.HTML('<div class="module-info">🧠 Test your knowledge with auto-generated quizzes. Choose a topic, format, and difficulty level.</div>')
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        quiz_topic = gr.Dropdown(
+                            choices=TOPIC_CHOICES,
+                            label="Select Topic",
+                            allow_custom_value=True,
+                            value="RNA-seq: Differential Expression (DESeq2)"
+                        )
+                    with gr.Column(scale=1):
+                        quiz_type = gr.Radio(
+                            choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
+                            value="Multiple Choice (MCQ)",
+                            label="Question Format"
+                        )
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        quiz_difficulty = gr.Radio(
+                            choices=DIFFICULTY_LEVELS,
+                            value="Intermediate",
+                            label="Difficulty"
+                        )
+                    with gr.Column(scale=1):
+                        num_questions = gr.Slider(
+                            minimum=1, maximum=10, value=5, step=1,
+                            label="Number of Questions"
+                        )
+                    with gr.Column(scale=1):
+                        generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary", size="lg")
+                quiz_output = gr.Markdown(label="Generated Quiz")
+                answer_key_state = gr.State("")
+                generate_quiz_btn.click(
+                    fn=generate_quiz,
+                    inputs=[quiz_topic, quiz_type, num_questions, quiz_difficulty, rag_store],
+                    outputs=[quiz_output, answer_key_state],
+                )
+                gr.Markdown("---")
+                gr.Markdown("### ✍️ Submit Your Answers")
+                user_answers = gr.Textbox(
+                    label="Enter your answers (e.g., '1: A, 2: B, 3: True...')",
+                    lines=5,
+                    placeholder="Type your answers here..."
+                )
+                check_btn = gr.Button("✅ Check Answers", variant="primary")
+                feedback_output = gr.Markdown(label="Feedback")
+                check_btn.click(
+                    fn=check_quiz_answers,
+                    inputs=[user_answers, answer_key_state],
+                    outputs=[feedback_output],
+                )
+            # ══════════════════════════════════════════════════════════════
+            # TAB 4: BUILD A LESSON
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("📚 Build a Lesson", id="lesson"):
+                gr.HTML('<div class="module-info">📚 Generate structured lessons with learning objectives, explanations, exercises, and quizzes for any bioinformatics topic.</div>')
+                with gr.Row():
+                    with gr.Column(scale=2):
+                        lesson_topic = gr.Dropdown(
+                            choices=TOPIC_CHOICES,
+                            label="Lesson Topic",
+                            allow_custom_value=True,
+                            value="RNA-seq: Differential Expression (DESeq2)"
+                        )
+                    with gr.Column(scale=1):
+                        lesson_level = gr.Radio(
+                            choices=DIFFICULTY_LEVELS,
+                            value="Intermediate",
+                            label="Student Level"
+                        )
+                with gr.Row():
+                    include_exercises = gr.Checkbox(label="Include Practical Exercises", value=True)
+                    include_quiz = gr.Checkbox(label="Include Self-Assessment Quiz", value=True)
+                    generate_lesson_btn = gr.Button("📝 Generate Lesson", variant="primary", size="lg")
+                lesson_output = gr.Markdown(label="Generated Lesson")
+                generate_lesson_btn.click(
+                    fn=generate_lesson,
+                    inputs=[lesson_topic, lesson_level, include_exercises, include_quiz],
+                    outputs=[lesson_output],
+                )
+            # ══════════════════════════════════════════════════════════════
+            # TAB 5: WORKFLOW COACH
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("🔬 Workflow Coach", id="workflow"):
+                gr.HTML('<div class="module-info">🔬 Get step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask questions about any step.</div>')
+                workflow_selector = gr.Dropdown(
+                    choices=WORKFLOW_CHOICES,
+                    label="Select Workflow",
+                    value="Bulk RNA-seq: Full DE Analysis Pipeline",
+                    allow_custom_value=True,
+                )
+                gr.ChatInterface(
+                    fn=workflow_respond,
+                    type="messages",
+                    additional_inputs=[
+                        workflow_selector,
+                        gr.Slider(
+                            minimum=0.1, maximum=1.5, value=0.7, step=0.1,
+                            label="Temperature"
+                        ),
+                    ],
+                    additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=False),
+                    examples=[
+                        "Walk me through the complete pipeline from raw FASTQ to differential expression results.",
+                        "I'm at the alignment step. What should I check before moving to counting?",
+                        "My mapping rate is only 45%. What could be wrong?",
+                        "How do I choose between STAR and HISAT2 for RNA-seq alignment?",
+                        "What parameters should I use for GATK HaplotypeCaller on exome data?",
+                        "How do I set the truncation parameters for DADA2 in QIIME2?",
+                    ],
+                )
+            # ══════════════════════════════════════════════════════════════
+            # TAB 6: PAPER TO LESSON
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("📰 Paper to Lesson", id="paper"):
+                gr.HTML('<div class="module-info">📰 Convert research papers into teaching material. Upload a paper first in the "Upload & Explain" tab, then use this module to generate lessons, slide outlines, and quiz questions from it.</div>')
+                output_format = gr.Radio(
+                    choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
+                    value="Lesson Plan",
+                    label="Output Format"
+                )
+                gr.ChatInterface(
+                    fn=paper_to_lesson_respond,
+                    type="messages",
+                    additional_inputs=[
+                        output_format,
+                        rag_store,
+                    ],
+                    additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
+                    examples=[
+                        "Convert this paper into a 45-minute lecture plan.",
+                        "Create a slide outline covering the key methods in this paper.",
+                        "Generate study notes highlighting the bioinformatics methods used.",
+                        "Create quiz questions testing understanding of this paper's methodology.",
+                    ],
+                )
+            # ══════════════════════════════════════════════════════════════
+            # TAB 7: VIVA PRACTICE
+            # ══════════════════════════════════════════════════════════════
+            with gr.Tab("🎓 Viva Practice", id="viva"):
+                gr.HTML('<div class="module-info">🎓 Practice for oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes you to demonstrate deeper understanding.</div>')
+                with gr.Row():
+                    viva_topic = gr.Dropdown(
+                        choices=TOPIC_CHOICES,
+                        label="Viva Topic",
+                        allow_custom_value=True,
+                        value="RNA-seq: Differential Expression (DESeq2)"
+                    )
+                    viva_difficulty = gr.Radio(
+                        choices=DIFFICULTY_LEVELS,
+                        value="Intermediate",
+                        label="Exam Difficulty"
+                    )
+                gr.ChatInterface(
+                    fn=viva_respond,
+                    type="messages",
+                    additional_inputs=[
+                        viva_topic,
+                        viva_difficulty,
+                    ],
+                    additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=False),
+                    examples=[
+                        "I'm ready for my viva. Please start with your first question.",
+                        "Can we focus on the statistical aspects of RNA-seq analysis?",
+                        "Ask me about variant calling and interpretation.",
+                        "Test my understanding of microbiome diversity analysis.",
+                    ],
+                )
+        # ── Footer ────────────────────────────────────────────────────────
+        gr.HTML("""
+        <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
+            <p><strong>Bioinformatics with BB Tutor</strong> — Educational AI Assistant</p>
+            <p>⚠️ For educational purposes only. Not for clinical use. Always verify critical information with primary sources.</p>
+            <p>Domains: RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · Methylation · Small RNA · Targeted Panels · Long-read · Spatial Transcriptomics · Multi-omics</p>
+        </div>
+        """)
+    return demo
+# ============================================================================
+# LAUNCH
+# ============================================================================
+if __name__ == "__main__":
+    demo = build_app()
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,
+    )