Spaces:

Babajaan
/

bioinformatics-bb-tutor

Sleeping

App Files Files Community

Babajaan commited on 15 days ago

Commit

855ef94

verified ·

1 Parent(s): f59aca4

Fix app.py: lazy loading, robust error handling, proper State usage

Browse files

Files changed (1) hide show

app.py +394 -397

app.py CHANGED Viewed

@@ -1,6 +1,13 @@
 """
 Bioinformatics with BB Tutor — Complete Application
-A production-oriented bioinformatics teaching assistant with 7 modules.
 """
 import gradio as gr
@@ -9,7 +16,6 @@ import json
 import os
 import re
 import time
-import hashlib
 from pathlib import Path
 # ── Conditional imports with fallbacks ────────────────────────────────────────
@@ -18,24 +24,21 @@ try:
     HAS_FITZ = True
 except ImportError:
     HAS_FITZ = False
 try:
     from sentence_transformers import SentenceTransformer
     HAS_ST = True
 except ImportError:
     HAS_ST = False
 try:
     from huggingface_hub import InferenceClient
     HAS_HF = True
 except ImportError:
     HAS_HF = False
-try:
-    import pandas as pd
-    HAS_PANDAS = True
-except ImportError:
-    HAS_PANDAS = False
 # ── Import knowledge base ────────────────────────────────────────────────────
 from knowledge_base import (
@@ -49,41 +52,78 @@ from knowledge_base import (
 # CONFIGURATION
 # ============================================================================
-# Model configuration - uses HF Inference API
 LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
-# RAG configuration
-CHUNK_SIZE = 400  # words per chunk
-CHUNK_OVERLAP = 60  # words overlap
 TOP_K_RETRIEVAL = 3
 # ============================================================================
-# BACKEND SERVICES
 # ============================================================================
 class LLMService:
-    """Singleton LLM inference service using HuggingFace Inference API."""
     def __init__(self):
         self.client = None
-        if HAS_HF and HF_TOKEN:
-            try:
-                self.client = InferenceClient(
-                    model=LLM_MODEL,
-                    token=HF_TOKEN,
-                    timeout=120,
-                )
-            except Exception as e:
-                print(f"Warning: Could not initialize InferenceClient: {e}")
     def is_available(self):
         return self.client is not None
     def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
-        """Stream a chat completion. Yields partial response strings."""
         if not self.is_available():
             yield self._fallback_response(messages)
             return
@@ -97,11 +137,16 @@ class LLMService:
                 top_p=0.9,
                 stream=True,
             ):
-                token = chunk.choices[0].delta.content or ""
                 partial += token
                 yield partial
         except Exception as e:
-            yield f"⚠️ LLM API error: {str(e)}\n\nPlease check that HF_TOKEN is set correctly in the Space settings and the model {LLM_MODEL} is accessible."
     def generate(self, messages, temperature=0.7, max_tokens=1024):
         """Non-streaming generation. Returns complete response."""
@@ -118,68 +163,89 @@ class LLMService:
             )
             return response.choices[0].message.content
         except Exception as e:
-            return f"⚠️ LLM API error: {str(e)}"
     def _fallback_response(self, messages):
-        """Knowledge-base powered fallback when LLM is not available."""
         user_msg = ""
         for m in reversed(messages):
-            if m["role"] == "user":
-                user_msg = m["content"].lower()
                 break
-        # Search knowledge base for relevant content
         response_parts = []
-        # Check glossary
         for term, definition in GLOSSARY.items():
-            if term.lower() in user_msg or any(w in user_msg for w in term.lower().split()):
                 response_parts.append(f"**{term}**: {definition}")
-        # Check workflows
         for wf_key, wf in WORKFLOWS.items():
-            if any(keyword in user_msg for keyword in wf["name"].lower().split()):
-                response_parts.append(f"\n### {wf['name']}\n")
                 for step in wf["steps"][:3]:
                     response_parts.append(f"**Step {step['step']}: {step['name']}**\n{step['description']}")
                 break
-        # Check misconceptions
         for misc in COMMON_MISCONCEPTIONS:
-            keywords = misc["misconception"].lower().split()
-            if any(w in user_msg for w in keywords if len(w) > 4):
                 response_parts.append(f"\n⚠️ **Common Misconception**: {misc['misconception']}\n\n✅ **Correction**: {misc['correction']}")
                 break
         if response_parts:
             return "📚 *Responding from knowledge base (LLM not configured):*\n\n" + "\n\n".join(response_parts)
-        else:
-            return (
-                "⚠️ **LLM is not configured.** To enable AI-powered responses:\n\n"
-                "1. Go to Space Settings → Repository Secrets\n"
-                "2. Add `HF_TOKEN` with your HuggingFace API token\n"
-                "3. The token needs access to inference API\n\n"
-                "Currently showing knowledge base results only. "
-                "Try asking about specific topics like 'DESeq2', 'variant calling', or 'FASTQ quality'."
-            )
 class RAGService:
-    """Document retrieval service with embedding-based search."""
     def __init__(self):
         self.embedder = None
-        if HAS_ST:
-            try:
-                self.embedder = SentenceTransformer(EMBED_MODEL)
-            except Exception as e:
-                print(f"Warning: Could not load embedding model: {e}")
-        # Pre-build knowledge base index
-        self.kb_chunks, self.kb_metadata = self._build_kb_index()
         self.kb_embeddings = None
-        if self.embedder and self.kb_chunks:
-            try:
                 self.kb_embeddings = self.embedder.encode(
                     self.kb_chunks,
                     convert_to_numpy=True,
@@ -187,20 +253,23 @@ class RAGService:
                     show_progress_bar=False,
                     batch_size=32,
                 )
-            except Exception as e:
-                print(f"Warning: Could not embed knowledge base: {e}")
     def _build_kb_index(self):
-        """Build searchable chunks from the knowledge base."""
         chunks = []
         metadata = []
-        # Index glossary terms
         for term, definition in GLOSSARY.items():
             chunks.append(f"{term}: {definition}")
             metadata.append({"source": "glossary", "topic": term, "type": "definition"})
-        # Index workflow steps
         for wf_key, wf in WORKFLOWS.items():
             for step in wf["steps"]:
                 step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
@@ -217,7 +286,7 @@ class RAGService:
                     "workflow": wf_key
                 })
-        # Index misconceptions
         for misc in COMMON_MISCONCEPTIONS:
             text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
             chunks.append(text)
@@ -228,17 +297,19 @@ class RAGService:
                 "severity": misc["severity"]
             })
-        # Index domain taxonomy
         for key, domain in DOMAIN_TAXONOMY.items():
-            text = f"{domain['name']} covers these subtopics: {', '.join(domain['subtopics'])}."
             chunks.append(text)
             metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
-        return chunks, metadata
     def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
-        """Search the knowledge base and optional user-uploaded content."""
-        if not self.embedder:
             return self._keyword_search(query, top_k)
         try:
@@ -250,40 +321,38 @@ class RAGService:
             results = []
-            # Search knowledge base
             if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
                 kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
                 top_indices = np.argsort(kb_scores)[::-1][:top_k]
                 for idx in top_indices:
-                    if kb_scores[idx] > 0.2:  # minimum relevance threshold
                         results.append({
                             "text": self.kb_chunks[idx],
                             "score": float(kb_scores[idx]),
                             "metadata": self.kb_metadata[idx]
                         })
-            # Search user-uploaded content
             if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
                 user_scores = np.dot(query_embedding, user_embeddings.T)[0]
                 top_user = np.argsort(user_scores)[::-1][:top_k]
                 for idx in top_user:
-                    if user_scores[idx] > 0.2:
                         results.append({
                             "text": user_chunks[idx],
                             "score": float(user_scores[idx]),
-                            "metadata": {"source": "uploaded_document", "type": "user_content"}
                         })
-            # Sort by score and return top_k
             results.sort(key=lambda x: x["score"], reverse=True)
             return results[:top_k]
         except Exception as e:
-            print(f"Embedding search error: {e}")
             return self._keyword_search(query, top_k)
     def _keyword_search(self, query, top_k=3):
-        """Fallback keyword-based search."""
         query_words = set(query.lower().split())
         scored = []
         for i, chunk in enumerate(self.kb_chunks):
@@ -299,8 +368,8 @@ class RAGService:
         return scored[:top_k]
     def embed_chunks(self, chunks):
-        """Embed a list of text chunks. Returns numpy array or None."""
-        if not self.embedder or not chunks:
             return None
         try:
             return self.embedder.encode(
@@ -308,28 +377,28 @@ class RAGService:
                 convert_to_numpy=True,
                 normalize_embeddings=True,
                 show_progress_bar=False,
-                batch_size=32,
             )
-        except Exception:
             return None
 class DocumentParser:
-    """Parse uploaded documents into text chunks."""
     @staticmethod
     def parse_file(filepath):
         """Extract text from uploaded file."""
         if filepath is None:
             return "", []
         filepath = str(filepath)
         ext = Path(filepath).suffix.lower()
         try:
             if ext == ".pdf" and HAS_FITZ:
                 return DocumentParser._parse_pdf(filepath)
-            elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam"):
                 return DocumentParser._parse_text(filepath)
             else:
                 return f"Unsupported file type: {ext}", []
@@ -341,8 +410,7 @@ class DocumentParser:
         doc = fitz.open(filepath)
         pages = []
         for page_num in range(len(doc)):
-            page = doc[page_num]
-            text = page.get_text()
             if text.strip():
                 pages.append(text)
         doc.close()
@@ -370,155 +438,146 @@ class DocumentParser:
         return chunks
-# ============================================================================
-# INITIALIZE SERVICES
-# ============================================================================
-print("🧬 Initializing BB Tutor services...")
 llm_service = LLMService()
 rag_service = RAGService()
 doc_parser = DocumentParser()
-print(f"   LLM available: {llm_service.is_available()}")
-print(f"   RAG embedder available: {rag_service.embedder is not None}")
-print(f"   Knowledge base chunks: {len(rag_service.kb_chunks)}")
-print("✅ BB Tutor services initialized!")
 # ============================================================================
-# MODULE 1: ASK THE TUTOR
 # ============================================================================
-def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_store):
-    """Main tutor chat handler with RAG-augmented responses."""
-    if not message.strip():
-        yield ""
-        return
-    # Retrieve relevant context
-    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
-    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
-    rag_results = rag_service.search(
-        message,
-        top_k=TOP_K_RETRIEVAL,
-        user_chunks=user_chunks,
-        user_embeddings=user_embeddings
-    )
-    # Build context from retrieved chunks
-    context_parts = []
-    if rag_results:
-        context_parts.append("RELEVANT KNOWLEDGE BASE CONTEXT:")
-        for r in rag_results:
-            source = r["metadata"].get("source", "unknown")
-            context_parts.append(f"[Source: {source}] {r['text']}")
-    # Build messages
-    messages = [{"role": "system", "content": system_prompt}]
-    if context_parts:
-        messages.append({
-            "role": "system",
-            "content": "\n".join(context_parts)
-        })
-    # Add conversation history
-    for h in history:
-        messages.append(h)
     messages.append({"role": "user", "content": message})
-    # Stream response
-    for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=max_tokens):
         yield partial
-# ============================================================================
-# MODULE 2: UPLOAD AND EXPLAIN
-# ============================================================================
-def process_upload(file, rag_store):
-    """Process an uploaded file: extract text, chunk, embed, explain."""
     if file is None:
-        return "Please upload a file first.", "", rag_store
     full_text, chunks = doc_parser.parse_file(file)
     if not chunks:
-        return "Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_store
-    # Embed the chunks
     embeddings = rag_service.embed_chunks(chunks)
-    # Update RAG store with uploaded content
-    new_store = dict(rag_store) if isinstance(rag_store, dict) else {"chunks": [], "embeddings": None}
-    new_store["chunks"] = chunks
-    if embeddings is not None:
-        new_store["embeddings"] = embeddings
-    # Generate explanation
-    preview = full_text[:3000] if len(full_text) > 3000 else full_text
-    messages = [
         {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
-        {"role": "user", "content": f"Please analyze and explain this uploaded content:\n\n{preview}"}
     ]
-    explanation = llm_service.generate(messages, temperature=0.5, max_tokens=1500)
-    # Add stats
-    stats = f"📊 **Document Stats:** {len(chunks)} chunks, ~{len(full_text.split())} words extracted\n\n---\n\n"
-    return stats + explanation, full_text[:5000], new_store
-def upload_chat_respond(message, history, rag_store):
-    """Chat about uploaded documents with RAG context."""
-    if not message.strip():
         yield ""
         return
-    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
-    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
     if not user_chunks:
-        yield "Please upload a document first using the upload panel above, then ask questions about it."
         return
-    # Retrieve relevant chunks from uploaded doc
-    rag_results = rag_service.search(
-        message, top_k=4,
-        user_chunks=user_chunks,
-        user_embeddings=user_embeddings
-    )
-    context = "CONTEXT FROM UPLOADED DOCUMENT:\n"
-    for r in rag_results:
-        context += f"\n{r['text']}\n"
     messages = [
         {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
-        {"role": "system", "content": context},
     ]
-    for h in history:
-        messages.append(h)
     messages.append({"role": "user", "content": message})
     for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
         yield partial
-# ============================================================================
-# MODULE 3: QUIZ ME
-# ============================================================================
-def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_store):
-    """Generate a quiz on a bioinformatics topic."""
     if not topic:
-        return "Please select or enter a topic first.", ""
-    # Get relevant context
     rag_results = rag_service.search(topic, top_k=3)
     context = ""
     if rag_results:
-        context = "Use this reference material:\n" + "\n".join(r["text"] for r in rag_results)
     template_key = {
         "Multiple Choice (MCQ)": "mcq",
@@ -527,71 +586,56 @@ def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_store):
     }.get(quiz_type, "mcq")
     quiz_prompt = QUIZ_TEMPLATES[template_key].format(
-        n=int(num_questions),
-        topic=topic,
-        difficulty=difficulty
     )
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]},
-    ]
     if context:
         messages.append({"role": "system", "content": context})
     messages.append({"role": "user", "content": quiz_prompt})
     response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
-    # Format nicely
     formatted = f"## 🧠 {topic} Quiz — {difficulty}\n\n"
-    formatted += f"*Type: {quiz_type} | Questions: {int(num_questions)}*\n\n---\n\n"
     formatted += response
-    # Store answer key
-    answer_key = response
-    return formatted, answer_key
 def check_quiz_answers(user_answers, answer_key):
-    """Provide feedback on quiz answers."""
-    if not user_answers.strip():
-        return "Please enter your answers first."
     if not answer_key:
-        return "Please generate a quiz first."
     messages = [
-        {"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare the student's answers to the correct answers. For each answer: mark it ✅ correct or ❌ incorrect, explain why, and provide the correct answer if wrong. Be encouraging but accurate. Give a final score."},
-        {"role": "user", "content": f"QUIZ AND ANSWER KEY:\n{answer_key}\n\nSTUDENT'S ANSWERS:\n{user_answers}\n\nPlease grade each answer:"}
     ]
     return llm_service.generate(messages, temperature=0.3, max_tokens=1500)
-# ============================================================================
-# MODULE 4: BUILD A LESSON
-# ============================================================================
 def generate_lesson(topic, level, include_exercises, include_quiz):
-    """Generate a structured lesson on a bioinformatics topic."""
     if not topic:
-        return "Please select or enter a topic."
-    # Get relevant context
     rag_results = rag_service.search(topic, top_k=4)
     context = ""
     if rag_results:
-        context = "Reference material:\n" + "\n".join(r["text"] for r in rag_results)
     prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
     if include_exercises:
         prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
     if include_quiz:
-        prompt += "\n\nInclude a 5-question self-assessment quiz at the end (with answers)."
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]},
-    ]
     if context:
         messages.append({"role": "system", "content": context})
     messages.append({"role": "user", "content": prompt})
@@ -599,148 +643,107 @@ def generate_lesson(topic, level, include_exercises, include_quiz):
     return llm_service.generate(messages, temperature=0.7, max_tokens=3000)
-# ============================================================================
-# MODULE 5: WORKFLOW COACH
-# ============================================================================
 def workflow_respond(message, history, selected_workflow, temperature):
-    """Workflow coaching chat handler."""
-    if not message.strip():
         yield ""
         return
-    # Get workflow context
     workflow_context = ""
     for wf_key, wf in WORKFLOWS.items():
         if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
             workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
             for step in wf["steps"]:
                 workflow_context += f"Step {step['step']}: {step['name']}\n"
-                workflow_context += f"  Description: {step['description']}\n"
-                workflow_context += f"  Tools: {', '.join(step.get('tools', []))}\n"
                 if step.get("common_mistakes"):
-                    workflow_context += f"  Common mistakes: {'; '.join(step['common_mistakes'])}\n"
                 workflow_context += "\n"
             break
-    # Also search RAG
     rag_results = rag_service.search(message, top_k=2)
     if rag_results:
-        workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"] for r in rag_results)
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]},
-    ]
     if workflow_context:
         messages.append({"role": "system", "content": workflow_context})
-    for h in history:
-        messages.append(h)
     messages.append({"role": "user", "content": message})
-    for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=1500):
         yield partial
-# ============================================================================
-# MODULE 6: PAPER TO LESSON
-# ============================================================================
-def paper_to_lesson_respond(message, history, output_format, rag_store):
-    """Convert paper content into teaching material."""
-    if not message.strip():
         yield ""
         return
-    user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
-    user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
     context = ""
     if user_chunks:
-        rag_results = rag_service.search(
-            message, top_k=4,
-            user_chunks=user_chunks,
-            user_embeddings=user_embeddings
-        )
         if rag_results:
-            context = "PAPER CONTENT:\n" + "\n".join(r["text"] for r in rag_results)
     format_instruction = {
         "Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
-        "Slide Outline": "Create a slide-by-slide outline with key points for each slide (title + 3-5 bullet points per slide).",
         "Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
         "Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
     }.get(output_format, "Create a structured lesson plan.")
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]},
-    ]
     if context:
         messages.append({"role": "system", "content": context})
-    for h in history:
-        messages.append(h)
-    full_message = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
-    messages.append({"role": "user", "content": full_message})
     for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
         yield partial
-# ============================================================================
-# MODULE 7: VIVA PRACTICE
-# ============================================================================
 def viva_respond(message, history, topic, difficulty):
-    """Viva voce practice session handler."""
-    if not message.strip():
         yield ""
         return
-    # Get topic context
     rag_results = rag_service.search(f"{topic} {message}", top_k=3)
     context = ""
     if rag_results:
-        context = "REFERENCE MATERIAL:\n" + "\n".join(r["text"] for r in rag_results)
     messages = [
         {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
-        {"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY LEVEL: {difficulty}\n\n{context}"},
     ]
-    for h in history:
-        messages.append(h)
     messages.append({"role": "user", "content": message})
     for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
         yield partial
-def start_viva(topic, difficulty):
-    """Generate the opening viva question."""
-    if not topic:
-        return "Please select a topic to begin the viva."
-    rag_results = rag_service.search(topic, top_k=2)
-    context = ""
-    if rag_results:
-        context = "\n".join(r["text"] for r in rag_results)
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
-        {"role": "system", "content": f"Topic: {topic}\nDifficulty: {difficulty}\n\nReference: {context}"},
-        {"role": "user", "content": f"I'm ready for my viva on {topic}. Please start with your first question."}
-    ]
-    return llm_service.generate(messages, temperature=0.7, max_tokens=500)
 # ============================================================================
 # GRADIO APP ASSEMBLY
 # ============================================================================
-# Custom CSS
 CUSTOM_CSS = """
 .main-header {
     text-align: center;
@@ -750,8 +753,8 @@ CUSTOM_CSS = """
     margin-bottom: 20px;
     color: white;
 }
-.main-header h1 { color: white; font-size: 2em; margin-bottom: 5px; }
-.main-header p { color: #ecf0f1; font-size: 1.1em; }
 .module-info {
     background: #f0f9ff;
     border-left: 4px solid #2e86c1;
@@ -767,22 +770,38 @@ CUSTOM_CSS = """
     border-radius: 0 8px 8px 0;
     font-size: 0.9em;
 }
 """
 def build_app():
-    with gr.Blocks(title="Bioinformatics with BB Tutor") as demo:
-        # Shared state across all tabs
         rag_store = gr.State({"chunks": [], "embeddings": None})
-        # ── Header ────────────────────────────────────────────────────────
-        gr.HTML("""
         <div class="main-header">
             <h1>🧬 Bioinformatics with BB Tutor</h1>
-            <p>Your AI-powered bioinformatics teaching assistant</p>
             <p style="font-size: 0.85em; opacity: 0.9;">
                 RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · and more
             </p>
         </div>
         """)
@@ -792,7 +811,7 @@ def build_app():
             # TAB 1: ASK THE TUTOR
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🧬 Ask the Tutor", id="ask"):
-                gr.HTML('<div class="module-info">💡 Ask any bioinformatics question. The tutor uses a curated knowledge base to provide accurate, educational answers with proper context.</div>')
                 gr.ChatInterface(
                     fn=tutor_respond,
@@ -801,38 +820,32 @@ def build_app():
                         gr.Textbox(
                             value=SYSTEM_PROMPTS["ask_tutor"],
                             label="System Prompt",
-                            lines=3,
-                            visible=True,
-                        ),
-                        gr.Slider(
-                            minimum=0.1, maximum=1.5, value=0.7, step=0.1,
-                            label="Temperature (lower = more focused, higher = more creative)"
-                        ),
-                        gr.Slider(
-                            minimum=256, maximum=4096, value=1024, step=256,
-                            label="Max Response Length (tokens)"
                         ),
                         rag_store,
                     ],
-                    additional_inputs_accordion=gr.Accordion("⚙️ Advanced Settings", open=False),
                     examples=[
-                        "What is the difference between DESeq2 and edgeR for differential expression analysis?",
-                        "Explain the GATK Best Practices variant calling pipeline step by step.",
-                        "What is the difference between alpha and beta diversity in microbiome analysis?",
                         "Why should I use adjusted p-values instead of raw p-values?",
-                        "Explain the single-cell RNA-seq analysis workflow from raw data to cell type annotation.",
-                        "What is BQSR and why is it important in variant calling?",
                     ],
-                    save_history=True,
                 )
-                gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> This tutor provides learning support, not clinical interpretations. Always consult qualified professionals for clinical genomics decisions.</div>')
             # ══════════════════════════════════════════════════════════════
-            # TAB 2: UPLOAD AND EXPLAIN
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📄 Upload & Explain", id="upload"):
-                gr.HTML('<div class="module-info">📄 Upload bioinformatics documents (PDFs, text files, VCFs, FASTA, etc.) and get AI-powered explanations. Uploaded content becomes available for Q&A across all modules.</div>')
                 with gr.Row():
                     with gr.Column(scale=1):
@@ -840,11 +853,12 @@ def build_app():
                             label="Upload Document",
                             file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
                                        ".fasta", ".fa", ".fastq", ".vcf", ".bed",
-                                       ".gff", ".gtf", ".sam"],
                             file_count="single",
                             type="filepath",
                         )
-                        process_btn = gr.Button("🔍 Analyze Document", variant="primary", size="lg")
                     with gr.Column(scale=2):
                         explanation_output = gr.Markdown(label="Analysis & Explanation")
@@ -858,14 +872,14 @@ def build_app():
                     outputs=[explanation_output, raw_text_output, rag_store],
                 )
-                gr.Markdown("### 💬 Ask Questions About Your Document")
                 gr.ChatInterface(
                     fn=upload_chat_respond,
                     type="messages",
                     additional_inputs=[rag_store],
                     additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
                     examples=[
-                        "Summarize the key methods used in this paper.",
                         "What bioinformatics tools are mentioned?",
                         "Explain the main findings in simple terms.",
                         "What are the limitations of this analysis?",
@@ -876,37 +890,29 @@ def build_app():
             # TAB 3: QUIZ ME
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("❓ Quiz Me", id="quiz"):
-                gr.HTML('<div class="module-info">🧠 Test your knowledge with auto-generated quizzes. Choose a topic, format, and difficulty level.</div>')
                 with gr.Row():
-                    with gr.Column(scale=2):
-                        quiz_topic = gr.Dropdown(
-                            choices=TOPIC_CHOICES,
-                            label="Select Topic",
-                            allow_custom_value=True,
-                            value="RNA-seq: Differential Expression (DESeq2)"
-                        )
-                    with gr.Column(scale=1):
-                        quiz_type = gr.Radio(
-                            choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
-                            value="Multiple Choice (MCQ)",
-                            label="Question Format"
-                        )
                 with gr.Row():
-                    with gr.Column(scale=1):
-                        quiz_difficulty = gr.Radio(
-                            choices=DIFFICULTY_LEVELS,
-                            value="Intermediate",
-                            label="Difficulty"
-                        )
-                    with gr.Column(scale=1):
-                        num_questions = gr.Slider(
-                            minimum=1, maximum=10, value=5, step=1,
-                            label="Number of Questions"
-                        )
-                    with gr.Column(scale=1):
-                        generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary", size="lg")
                 quiz_output = gr.Markdown(label="Generated Quiz")
                 answer_key_state = gr.State("")
@@ -919,12 +925,14 @@ def build_app():
                 gr.Markdown("---")
                 gr.Markdown("### ✍️ Submit Your Answers")
-                user_answers = gr.Textbox(
-                    label="Enter your answers (e.g., '1: A, 2: B, 3: True...')",
-                    lines=5,
-                    placeholder="Type your answers here..."
-                )
-                check_btn = gr.Button("✅ Check Answers", variant="primary")
                 feedback_output = gr.Markdown(label="Feedback")
                 check_btn.click(
@@ -937,27 +945,25 @@ def build_app():
             # TAB 4: BUILD A LESSON
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📚 Build a Lesson", id="lesson"):
-                gr.HTML('<div class="module-info">📚 Generate structured lessons with learning objectives, explanations, exercises, and quizzes for any bioinformatics topic.</div>')
                 with gr.Row():
-                    with gr.Column(scale=2):
-                        lesson_topic = gr.Dropdown(
-                            choices=TOPIC_CHOICES,
-                            label="Lesson Topic",
-                            allow_custom_value=True,
-                            value="RNA-seq: Differential Expression (DESeq2)"
-                        )
-                    with gr.Column(scale=1):
-                        lesson_level = gr.Radio(
-                            choices=DIFFICULTY_LEVELS,
-                            value="Intermediate",
-                            label="Student Level"
-                        )
                 with gr.Row():
-                    include_exercises = gr.Checkbox(label="Include Practical Exercises", value=True)
-                    include_quiz = gr.Checkbox(label="Include Self-Assessment Quiz", value=True)
-                    generate_lesson_btn = gr.Button("📝 Generate Lesson", variant="primary", size="lg")
                 lesson_output = gr.Markdown(label="Generated Lesson")
@@ -971,7 +977,7 @@ def build_app():
             # TAB 5: WORKFLOW COACH
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🔬 Workflow Coach", id="workflow"):
-                gr.HTML('<div class="module-info">🔬 Get step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask questions about any step.</div>')
                 workflow_selector = gr.Dropdown(
                     choices=WORKFLOW_CHOICES,
@@ -985,19 +991,16 @@ def build_app():
                     type="messages",
                     additional_inputs=[
                         workflow_selector,
-                        gr.Slider(
-                            minimum=0.1, maximum=1.5, value=0.7, step=0.1,
-                            label="Temperature"
-                        ),
                     ],
-                    additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=False),
                     examples=[
-                        "Walk me through the complete pipeline from raw FASTQ to differential expression results.",
-                        "I'm at the alignment step. What should I check before moving to counting?",
                         "My mapping rate is only 45%. What could be wrong?",
-                        "How do I choose between STAR and HISAT2 for RNA-seq alignment?",
-                        "What parameters should I use for GATK HaplotypeCaller on exome data?",
-                        "How do I set the truncation parameters for DADA2 in QIIME2?",
                     ],
                 )
@@ -1005,7 +1008,7 @@ def build_app():
             # TAB 6: PAPER TO LESSON
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📰 Paper to Lesson", id="paper"):
-                gr.HTML('<div class="module-info">📰 Convert research papers into teaching material. Upload a paper first in the "Upload & Explain" tab, then use this module to generate lessons, slide outlines, and quiz questions from it.</div>')
                 output_format = gr.Radio(
                     choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
@@ -1016,16 +1019,13 @@ def build_app():
                 gr.ChatInterface(
                     fn=paper_to_lesson_respond,
                     type="messages",
-                    additional_inputs=[
-                        output_format,
-                        rag_store,
-                    ],
                     additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
                     examples=[
                         "Convert this paper into a 45-minute lecture plan.",
-                        "Create a slide outline covering the key methods in this paper.",
-                        "Generate study notes highlighting the bioinformatics methods used.",
-                        "Create quiz questions testing understanding of this paper's methodology.",
                     ],
                 )
@@ -1033,7 +1033,7 @@ def build_app():
             # TAB 7: VIVA PRACTICE
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🎓 Viva Practice", id="viva"):
-                gr.HTML('<div class="module-info">🎓 Practice for oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes you to demonstrate deeper understanding.</div>')
                 with gr.Row():
                     viva_topic = gr.Dropdown(
@@ -1045,31 +1045,28 @@ def build_app():
                     viva_difficulty = gr.Radio(
                         choices=DIFFICULTY_LEVELS,
                         value="Intermediate",
-                        label="Exam Difficulty"
                     )
                 gr.ChatInterface(
                     fn=viva_respond,
                     type="messages",
-                    additional_inputs=[
-                        viva_topic,
-                        viva_difficulty,
-                    ],
-                    additional_inputs_accordion=gr.Accordion("⚙️ Settings", open=False),
                     examples=[
-                        "I'm ready for my viva. Please start with your first question.",
-                        "Can we focus on the statistical aspects of RNA-seq analysis?",
                         "Ask me about variant calling and interpretation.",
-                        "Test my understanding of microbiome diversity analysis.",
                     ],
                 )
-        # ── Footer ────────────────────────────────────────────────────────
         gr.HTML("""
         <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
             <p><strong>Bioinformatics with BB Tutor</strong> — Educational AI Assistant</p>
-            <p>⚠️ For educational purposes only. Not for clinical use. Always verify critical information with primary sources.</p>
-            <p>Domains: RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · Methylation · Small RNA · Targeted Panels · Long-read · Spatial Transcriptomics · Multi-omics</p>
         </div>
         """)

 """
 Bioinformatics with BB Tutor — Complete Application
+A production bioinformatics teaching assistant with 7 modules.
+Architecture:
+- Backend: LLMService (HuggingFace InferenceClient), RAGService (sentence-transformers),
+  DocumentParser (PyMuPDF + text), knowledge_base (domain content)
+- Frontend: 7 Gradio tabs with ChatInterface, file upload, quiz generation, lesson building
+- Data flow: User query → RAG retrieval → LLM with context → streaming response
+- Shared state: rag_store (gr.State) holds uploaded document chunks + embeddings across tabs
 """
 import gradio as gr
 import os
 import re
 import time
 from pathlib import Path
 # ── Conditional imports with fallbacks ────────────────────────────────────────
     HAS_FITZ = True
 except ImportError:
     HAS_FITZ = False
+    print("Warning: PyMuPDF not available. PDF parsing disabled.")
 try:
     from sentence_transformers import SentenceTransformer
     HAS_ST = True
 except ImportError:
     HAS_ST = False
+    print("Warning: sentence-transformers not available. Embedding search disabled.")
 try:
     from huggingface_hub import InferenceClient
     HAS_HF = True
 except ImportError:
     HAS_HF = False
+    print("Warning: huggingface_hub not available. LLM service disabled.")
 # ── Import knowledge base ────────────────────────────────────────────────────
 from knowledge_base import (
 # CONFIGURATION
 # ============================================================================
 LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
 EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+CHUNK_SIZE = 400
+CHUNK_OVERLAP = 60
 TOP_K_RETRIEVAL = 3
 # ============================================================================
+# BACKEND SERVICES — Singleton Pattern
 # ============================================================================
 class LLMService:
+    """Lazy-initialized LLM inference service."""
+    _instance = None
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(self):
+        if LLMService._initialized:
+            return
+        LLMService._initialized = True
         self.client = None
+        self._try_init()
+    def _try_init(self):
+        if not HAS_HF:
+            print("LLMService: huggingface_hub not available")
+            return
+        if not HF_TOKEN:
+            print("LLMService: HF_TOKEN not set in environment")
+            return
+        try:
+            self.client = InferenceClient(
+                model=LLM_MODEL,
+                token=HF_TOKEN,
+                timeout=120,
+            )
+            print("LLMService: Initialized successfully")
+        except Exception as e:
+            print(f"LLMService: Failed to initialize: {e}")
+            self.client = None
     def is_available(self):
         return self.client is not None
+    def _format_messages(self, messages, system_prompt=None, rag_context=None):
+        """Build message list with optional system prompt and RAG context."""
+        formatted = []
+        if system_prompt:
+            formatted.append({"role": "system", "content": system_prompt})
+        if rag_context:
+            formatted.append({"role": "system", "content": rag_context})
+        # Add conversation history (already formatted)
+        for m in messages:
+            if isinstance(m, dict) and "role" in m:
+                formatted.append(m)
+            elif isinstance(m, (list, tuple)) and len(m) >= 2:
+                # Handle tuple format (text, response)
+                formatted.append({"role": "user", "content": str(m[0])})
+                if len(m) > 1 and m[1]:
+                    formatted.append({"role": "assistant", "content": str(m[1])})
+        return formatted
     def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
+        """Stream chat completion. Yields partial response strings."""
         if not self.is_available():
             yield self._fallback_response(messages)
             return
                 top_p=0.9,
                 stream=True,
             ):
+                token = ""
+                if hasattr(chunk, 'choices') and chunk.choices:
+                    choice = chunk.choices[0]
+                    if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
+                        token = choice.delta.content or ""
                 partial += token
                 yield partial
         except Exception as e:
+            print(f"LLM stream error: {e}")
+            yield f"⚠️ LLM API error: {str(e)}\n\nPlease check your HF_TOKEN in Space settings and ensure the model '{LLM_MODEL}' is accessible.\n\nThe tutor is still functional using its knowledge base for many questions — try asking about specific bioinformatics topics!"
     def generate(self, messages, temperature=0.7, max_tokens=1024):
         """Non-streaming generation. Returns complete response."""
             )
             return response.choices[0].message.content
         except Exception as e:
+            print(f"LLM generate error: {e}")
+            return f"⚠️ LLM API error: {str(e)}\n\nThe tutor can still answer from its knowledge base. Try asking about specific concepts like 'DESeq2 normalization' or 'variant calling pipeline'!"
     def _fallback_response(self, messages):
+        """Knowledge-base fallback when LLM unavailable."""
+        # Extract user query
         user_msg = ""
         for m in reversed(messages):
+            if isinstance(m, dict) and m.get("role") == "user":
+                user_msg = m.get("content", "").lower()
                 break
+        if not user_msg:
+            return "⚠️ **LLM not available.** Add HF_TOKEN in Space settings to enable AI responses.\n\nMeanwhile, the knowledge base covers: DESeq2, variant calling, microbiome diversity, scRNA-seq clustering, and more. Try asking a specific question!"
         response_parts = []
+        # Glossary match
         for term, definition in GLOSSARY.items():
+            if term.lower() in user_msg:
                 response_parts.append(f"**{term}**: {definition}")
+                if len(response_parts) >= 3:
+                    break
+        # Workflow match
         for wf_key, wf in WORKFLOWS.items():
+            if any(kw in user_msg for kw in wf["name"].lower().split()):
+                response_parts.append(f"\n### {wf['name']}")
                 for step in wf["steps"][:3]:
                     response_parts.append(f"**Step {step['step']}: {step['name']}**\n{step['description']}")
                 break
+        # Misconception match
         for misc in COMMON_MISCONCEPTIONS:
+            if misc["domain"].replace("_", " ") in user_msg or any(w in user_msg for w in misc["misconception"].lower().split()[:5]):
                 response_parts.append(f"\n⚠️ **Common Misconception**: {misc['misconception']}\n\n✅ **Correction**: {misc['correction']}")
                 break
         if response_parts:
             return "📚 *Responding from knowledge base (LLM not configured):*\n\n" + "\n\n".join(response_parts)
+        return (
+            "⚠️ **AI responses require HF_TOKEN.**\n\n"
+            "To enable full AI-powered responses:\n"
+            "1. Go to your HuggingFace account → Settings → Access Tokens\n"
+            "2. Create a token with 'inference-api' scope\n"
+            "3. Add it as a Secret named `HF_TOKEN` in this Space's Settings\n\n"
+            "The knowledge base can still answer many questions. Try asking about 'RNA-seq workflow', 'variant calling', or 'microbiome diversity'!"
+        )
 class RAGService:
+    """Document retrieval with lazy embedding model loading."""
+    _instance = None
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
     def __init__(self):
+        if RAGService._initialized:
+            return
+        RAGService._initialized = True
         self.embedder = None
+        self.kb_chunks = []
+        self.kb_metadata = []
         self.kb_embeddings = None
+        self._build_kb_index()
+    def _ensure_embedder(self):
+        """Lazy load the embedding model."""
+        if self.embedder is not None:
+            return True
+        if not HAS_ST:
+            return False
+        try:
+            print("RAGService: Loading embedding model (this may take a moment)...")
+            self.embedder = SentenceTransformer(EMBED_MODEL)
+            print("RAGService: Embedding model loaded")
+            # Now embed the KB
+            if self.kb_chunks:
                 self.kb_embeddings = self.embedder.encode(
                     self.kb_chunks,
                     convert_to_numpy=True,
                     show_progress_bar=False,
                     batch_size=32,
                 )
+                print(f"RAGService: KB embedded ({len(self.kb_chunks)} chunks)")
+            return True
+        except Exception as e:
+            print(f"RAGService: Failed to load embedder: {e}")
+            return False
     def _build_kb_index(self):
+        """Build searchable chunks from knowledge base."""
         chunks = []
         metadata = []
+        # Glossary
         for term, definition in GLOSSARY.items():
             chunks.append(f"{term}: {definition}")
             metadata.append({"source": "glossary", "topic": term, "type": "definition"})
+        # Workflows
         for wf_key, wf in WORKFLOWS.items():
             for step in wf["steps"]:
                 step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
                     "workflow": wf_key
                 })
+        # Misconceptions
         for misc in COMMON_MISCONCEPTIONS:
             text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
             chunks.append(text)
                 "severity": misc["severity"]
             })
+        # Taxonomy
         for key, domain in DOMAIN_TAXONOMY.items():
+            text = f"{domain['name']} covers: {', '.join(domain['subtopics'][:10])}"
             chunks.append(text)
             metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
+        self.kb_chunks = chunks
+        self.kb_metadata = metadata
+        print(f"RAGService: Built KB with {len(chunks)} chunks")
     def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
+        """Search KB and optionally user-uploaded content."""
+        if not self._ensure_embedder():
             return self._keyword_search(query, top_k)
         try:
             results = []
+            # Search KB
             if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
                 kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
                 top_indices = np.argsort(kb_scores)[::-1][:top_k]
                 for idx in top_indices:
+                    if kb_scores[idx] > 0.15:
                         results.append({
                             "text": self.kb_chunks[idx],
                             "score": float(kb_scores[idx]),
                             "metadata": self.kb_metadata[idx]
                         })
+            # Search user content
             if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
                 user_scores = np.dot(query_embedding, user_embeddings.T)[0]
                 top_user = np.argsort(user_scores)[::-1][:top_k]
                 for idx in top_user:
+                    if user_scores[idx] > 0.15:
                         results.append({
                             "text": user_chunks[idx],
                             "score": float(user_scores[idx]),
+                            "metadata": {"source": "uploaded", "type": "user_content"}
                         })
             results.sort(key=lambda x: x["score"], reverse=True)
             return results[:top_k]
         except Exception as e:
+            print(f"RAG search error: {e}")
             return self._keyword_search(query, top_k)
     def _keyword_search(self, query, top_k=3):
+        """Fallback keyword search."""
         query_words = set(query.lower().split())
         scored = []
         for i, chunk in enumerate(self.kb_chunks):
         return scored[:top_k]
     def embed_chunks(self, chunks):
+        """Embed text chunks. Returns numpy array or None."""
+        if not self._ensure_embedder() or not chunks:
             return None
         try:
             return self.embedder.encode(
                 convert_to_numpy=True,
                 normalize_embeddings=True,
                 show_progress_bar=False,
+                batch_size=16,
             )
+        except Exception as e:
+            print(f"Embed chunks error: {e}")
             return None
 class DocumentParser:
+    """Parse uploaded documents."""
     @staticmethod
     def parse_file(filepath):
         """Extract text from uploaded file."""
         if filepath is None:
             return "", []
         filepath = str(filepath)
         ext = Path(filepath).suffix.lower()
         try:
             if ext == ".pdf" and HAS_FITZ:
                 return DocumentParser._parse_pdf(filepath)
+            elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam", ".bam"):
                 return DocumentParser._parse_text(filepath)
             else:
                 return f"Unsupported file type: {ext}", []
         doc = fitz.open(filepath)
         pages = []
         for page_num in range(len(doc)):
+            text = doc[page_num].get_text()
             if text.strip():
                 pages.append(text)
         doc.close()
         return chunks
+# ── Initialize services (lightweight, no heavy downloads) ───────────────────────
 llm_service = LLMService()
 rag_service = RAGService()
 doc_parser = DocumentParser()
+print(f"🧬 BB Tutor initialized. LLM: {llm_service.is_available()}, Embeddings: {rag_service.embedder is not None}")
 # ============================================================================
+# HANDLER FUNCTIONS — All receive/return plain Python values (not components)
 # ============================================================================
+def _rag_context(query, user_chunks=None, user_embeddings=None):
+    """Retrieve RAG context as formatted string."""
+    results = rag_service.search(query, top_k=TOP_K_RETRIEVAL,
+                                   user_chunks=user_chunks, user_embeddings=user_embeddings)
+    if not results:
+        return ""
+    parts = ["RELEVANT KNOWLEDGE BASE CONTEXT:"]
+    for r in results:
+        source = r["metadata"].get("source", "kb")
+        parts.append(f"[{source}] {r['text'][:800]}")
+    return "\n".join(parts)
+def _format_history(history):
+    """Convert Gradio history to OpenAI-style messages."""
+    messages = []
+    for h in history:
+        if isinstance(h, dict):
+            messages.append(h)
+        elif isinstance(h, (list, tuple)):
+            if len(h) >= 1 and h[0]:
+                messages.append({"role": "user", "content": str(h[0])})
+            if len(h) >= 2 and h[1]:
+                messages.append({"role": "assistant", "content": str(h[1])})
+    return messages
+# ── Module 1: Ask the Tutor ───────────────────────────────────────────────────
+def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_state):
+    """Ask the Tutor handler — streaming."""
+    if not message or not message.strip():
+        yield ""
+        return
+    rag_state = rag_state or {"chunks": [], "embeddings": None}
+    user_chunks = rag_state.get("chunks", [])
+    user_embeddings = rag_state.get("embeddings")
+    rag_ctx = _rag_context(message, user_chunks, user_embeddings)
+    messages = [{"role": "system", "content": system_prompt}]
+    if rag_ctx:
+        messages.append({"role": "system", "content": rag_ctx})
+    messages.extend(_format_history(history))
     messages.append({"role": "user", "content": message})
+    for partial in llm_service.stream_chat(messages, temperature, max_tokens):
         yield partial
+# ── Module 2: Upload & Explain ──────────────────────────────────────────────
+def process_upload(file, rag_state):
+    """Process uploaded file. Returns (explanation_markdown, raw_text, new_rag_state)."""
+    rag_state = rag_state or {"chunks": [], "embeddings": None}
     if file is None:
+        return "📁 Please upload a file first.", "", rag_state
     full_text, chunks = doc_parser.parse_file(file)
     if not chunks:
+        return "⚠️ Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_state
     embeddings = rag_service.embed_chunks(chunks)
+    new_state = {"chunks": chunks, "embeddings": embeddings}
+    preview = full_text[:2500] if len(full_text) > 2500 else full_text
+    msgs = [
         {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
+        {"role": "user", "content": f"Analyze and explain this bioinformatics document:\n\n{preview}"}
     ]
+    explanation = llm_service.generate(msgs, temperature=0.5, max_tokens=1500)
+    stats = f"📊 **Document Stats:** {len(chunks)} chunks, ~{len(full_text.split())} words | "
+    stats += f"File type: {Path(str(file)).suffix} | "
+    stats += "🤖 AI-powered" if llm_service.is_available() else "📚 Knowledge-base mode"
+    stats += f"\n\n---\n\n"
+    return stats + explanation, full_text[:5000], new_state
+def upload_chat_respond(message, history, rag_state):
+    """Chat about uploaded documents."""
+    if not message or not message.strip():
         yield ""
         return
+    rag_state = rag_state or {"chunks": [], "embeddings": None}
+    user_chunks = rag_state.get("chunks", [])
+    user_embeddings = rag_state.get("embeddings")
     if not user_chunks:
+        yield "📁 Please upload a document in the panel above, then ask questions about it.\n\nYour uploaded document will be indexed and searchable across all modules!"
         return
+    rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
+    ctx = "CONTEXT FROM UPLOADED DOCUMENT:\n"
+    if rag_results:
+        for r in rag_results:
+            ctx += f"\n{r['text'][:600]}\n"
+    else:
+        ctx += "(No highly relevant passages found — answering from general knowledge)\n"
     messages = [
         {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
+        {"role": "system", "content": ctx},
     ]
+    messages.extend(_format_history(history))
     messages.append({"role": "user", "content": message})
     for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
         yield partial
+# ── Module 3: Quiz Me ─────────────────────────────────────────────────────────
+def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_state):
+    """Generate quiz. Returns (quiz_markdown, answer_key_string)."""
     if not topic:
+        return "❓ Please select or enter a topic first.", ""
+    rag_state = rag_state or {"chunks": [], "embeddings": None}
     rag_results = rag_service.search(topic, top_k=3)
     context = ""
     if rag_results:
+        context = "Reference material:\n" + "\n".join(r["text"][:500] for r in rag_results)
     template_key = {
         "Multiple Choice (MCQ)": "mcq",
     }.get(quiz_type, "mcq")
     quiz_prompt = QUIZ_TEMPLATES[template_key].format(
+        n=int(num_questions), topic=topic, difficulty=difficulty
     )
+    messages = [{"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]}]
     if context:
         messages.append({"role": "system", "content": context})
     messages.append({"role": "user", "content": quiz_prompt})
     response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
     formatted = f"## 🧠 {topic} Quiz — {difficulty}\n\n"
+    formatted += f"*Format: {quiz_type} | Questions: {int(num_questions)}*\n\n---\n\n"
     formatted += response
+    return formatted, response
 def check_quiz_answers(user_answers, answer_key):
+    """Grade quiz answers."""
+    if not user_answers or not user_answers.strip():
+        return "✍️ Please enter your answers above before checking."
     if not answer_key:
+        return "⚠️ Please generate a quiz first (use the panel above)."
     messages = [
+        {"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare student answers to correct answers. For each: mark ✅ or ❌, explain briefly, provide correct answer if wrong. Be encouraging. Give final score."},
+        {"role": "user", "content": f"QUIZ AND ANSWERS:\n{answer_key}\n\nSTUDENT ANSWERS:\n{user_answers}\n\nGrade each:"}
     ]
     return llm_service.generate(messages, temperature=0.3, max_tokens=1500)
+# ── Module 4: Build a Lesson ──────────────────────────────────────────────────
 def generate_lesson(topic, level, include_exercises, include_quiz):
+    """Generate structured lesson."""
     if not topic:
+        return "📚 Please select or enter a topic."
     rag_results = rag_service.search(topic, top_k=4)
     context = ""
     if rag_results:
+        context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
     prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
     if include_exercises:
         prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
     if include_quiz:
+        prompt += "\n\nInclude a 5-question self-assessment quiz (with answers)."
+    messages = [{"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]}]
     if context:
         messages.append({"role": "system", "content": context})
     messages.append({"role": "user", "content": prompt})
     return llm_service.generate(messages, temperature=0.7, max_tokens=3000)
+# ── Module 5: Workflow Coach ──────────────────────────────────────────────────
 def workflow_respond(message, history, selected_workflow, temperature):
+    """Workflow Coach handler."""
+    if not message or not message.strip():
         yield ""
         return
     workflow_context = ""
     for wf_key, wf in WORKFLOWS.items():
         if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
             workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
             for step in wf["steps"]:
                 workflow_context += f"Step {step['step']}: {step['name']}\n"
+                workflow_context += f"  {step['description']}\n"
+                if step.get("tools"):
+                    workflow_context += f"  Tools: {', '.join(step['tools'])}\n"
                 if step.get("common_mistakes"):
+                    workflow_context += f"  ⚠️ Common mistakes: {'; '.join(step['common_mistakes'])}\n"
                 workflow_context += "\n"
             break
     rag_results = rag_service.search(message, top_k=2)
     if rag_results:
+        workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"][:500] for r in rag_results)
+    messages = [{"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]}]
     if workflow_context:
         messages.append({"role": "system", "content": workflow_context})
+    messages.extend(_format_history(history))
     messages.append({"role": "user", "content": message})
+    for partial in llm_service.stream_chat(messages, temperature, 1500):
         yield partial
+# ── Module 6: Paper to Lesson ─────────────────────────────────────────────────
+def paper_to_lesson_respond(message, history, output_format, rag_state):
+    """Convert papers to teaching material."""
+    if not message or not message.strip():
         yield ""
         return
+    rag_state = rag_state or {"chunks": [], "embeddings": None}
+    user_chunks = rag_state.get("chunks", [])
+    user_embeddings = rag_state.get("embeddings")
     context = ""
     if user_chunks:
+        rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
         if rag_results:
+            context = "PAPER CONTENT:\n" + "\n".join(r["text"][:600] for r in rag_results)
     format_instruction = {
         "Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
+        "Slide Outline": "Create a slide-by-slide outline with key points for each slide.",
         "Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
         "Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
     }.get(output_format, "Create a structured lesson plan.")
+    messages = [{"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]}]
     if context:
         messages.append({"role": "system", "content": context})
+    messages.extend(_format_history(history))
+    full_msg = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
+    messages.append({"role": "user", "content": full_msg})
     for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
         yield partial
+# ── Module 7: Viva Practice ───────────────────────────────────────────────────
 def viva_respond(message, history, topic, difficulty):
+    """Viva practice handler."""
+    if not message or not message.strip():
         yield ""
         return
     rag_results = rag_service.search(f"{topic} {message}", top_k=3)
     context = ""
     if rag_results:
+        context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
     messages = [
         {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
+        {"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY: {difficulty}\n\n{context}"},
     ]
+    messages.extend(_format_history(history))
     messages.append({"role": "user", "content": message})
     for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
         yield partial
 # ============================================================================
 # GRADIO APP ASSEMBLY
 # ============================================================================
 CUSTOM_CSS = """
 .main-header {
     text-align: center;
     margin-bottom: 20px;
     color: white;
 }
+.main-header h1 { color: white; font-size: 2em; margin: 0; }
+.main-header p { color: #ecf0f1; margin: 5px 0; }
 .module-info {
     background: #f0f9ff;
     border-left: 4px solid #2e86c1;
     border-radius: 0 8px 8px 0;
     font-size: 0.9em;
 }
+.status-badge {
+    display: inline-block;
+    padding: 4px 12px;
+    border-radius: 12px;
+    font-size: 0.85em;
+    font-weight: bold;
+}
+.status-on { background: #d4edda; color: #155724; }
+.status-off { background: #f8d7da; color: #721c24; }
 """
 def build_app():
+    with gr.Blocks(title="Bioinformatics with BB Tutor", css=CUSTOM_CSS) as demo:
+        # ── Global shared state ─────────────────────────────────────────
         rag_store = gr.State({"chunks": [], "embeddings": None})
+        # ── Status indicator ────────────────────────────────────────────
+        llm_status = "🟢 AI Enabled" if llm_service.is_available() else "🔴 AI Offline (Knowledge Base Active)"
+        # ── Header ─────────────────────────────────────────────────────
+        gr.HTML(f"""
         <div class="main-header">
             <h1>🧬 Bioinformatics with BB Tutor</h1>
+            <p>AI-powered bioinformatics teaching assistant</p>
             <p style="font-size: 0.85em; opacity: 0.9;">
                 RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · and more
             </p>
+            <p style="font-size: 0.8em; margin-top: 8px;">
+                <span class="status-badge {'status-on' if llm_service.is_available() else 'status-off'}">{llm_status}</span>
+            </p>
         </div>
         """)
             # TAB 1: ASK THE TUTOR
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🧬 Ask the Tutor", id="ask"):
+                gr.HTML('<div class="module-info">💡 Ask any bioinformatics question. RAG-augmented responses from a curated knowledge base covering 15+ domains.</div>')
                 gr.ChatInterface(
                     fn=tutor_respond,
                         gr.Textbox(
                             value=SYSTEM_PROMPTS["ask_tutor"],
                             label="System Prompt",
+                            lines=2,
+                            visible=False,
                         ),
+                        gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
+                        gr.Slider(256, 4096, 1024, step=256, label="Max Tokens", visible=False),
                         rag_store,
                     ],
+                    additional_inputs_accordion=gr.Accordion("⚙️ Advanced", open=False, visible=False),
                     examples=[
+                        "What is the difference between DESeq2 and edgeR?",
+                        "Explain the GATK variant calling pipeline step by step.",
+                        "What is the difference between alpha and beta diversity?",
                         "Why should I use adjusted p-values instead of raw p-values?",
+                        "Explain the single-cell RNA-seq analysis workflow.",
+                        "What is BQSR and why is it important?",
+                        "How do I choose between STAR and HISAT2 for alignment?",
+                        "What common mistakes do students make with DESeq2?",
                     ],
                 )
+                gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> Not for clinical interpretation. Always consult qualified professionals for clinical genomics.</div>')
             # ══════════════════════════════════════════════════════════════
+            # TAB 2: UPLOAD & EXPLAIN
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📄 Upload & Explain", id="upload"):
+                gr.HTML('<div class="module-info">📄 Upload bioinformatics documents (PDF, TXT, FASTA, VCF, etc.) and get AI-powered analysis. Content is indexed and searchable across all modules.</div>')
                 with gr.Row():
                     with gr.Column(scale=1):
                             label="Upload Document",
                             file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
                                        ".fasta", ".fa", ".fastq", ".vcf", ".bed",
+                                       ".gff", ".gtf", ".sam", ".bam"],
                             file_count="single",
                             type="filepath",
                         )
+                        process_btn = gr.Button("🔍 Analyze Document", variant="primary")
+                        gr.Markdown("**Supported:** PDF, text, FASTA/FASTQ, VCF, BED, GFF/GTF, SAM/BAM, CSV/TSV")
                     with gr.Column(scale=2):
                         explanation_output = gr.Markdown(label="Analysis & Explanation")
                     outputs=[explanation_output, raw_text_output, rag_store],
                 )
+                gr.Markdown("### 💬 Chat About Your Document")
                 gr.ChatInterface(
                     fn=upload_chat_respond,
                     type="messages",
                     additional_inputs=[rag_store],
                     additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
                     examples=[
+                        "Summarize the key methods in this paper.",
                         "What bioinformatics tools are mentioned?",
                         "Explain the main findings in simple terms.",
                         "What are the limitations of this analysis?",
             # TAB 3: QUIZ ME
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("❓ Quiz Me", id="quiz"):
+                gr.HTML('<div class="module-info">🧠 Test your knowledge with AI-generated quizzes across all bioinformatics domains.</div>')
                 with gr.Row():
+                    quiz_topic = gr.Dropdown(
+                        choices=TOPIC_CHOICES,
+                        label="Select Topic",
+                        allow_custom_value=True,
+                        value="RNA-seq: Differential Expression (DESeq2)"
+                    )
+                    quiz_type = gr.Radio(
+                        choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
+                        value="Multiple Choice (MCQ)",
+                        label="Format"
+                    )
                 with gr.Row():
+                    quiz_difficulty = gr.Radio(
+                        choices=DIFFICULTY_LEVELS,
+                        value="Intermediate",
+                        label="Difficulty"
+                    )
+                    num_questions = gr.Slider(1, 10, 5, step=1, label="# Questions")
+                    generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary")
                 quiz_output = gr.Markdown(label="Generated Quiz")
                 answer_key_state = gr.State("")
                 gr.Markdown("---")
                 gr.Markdown("### ✍️ Submit Your Answers")
+                with gr.Row():
+                    user_answers = gr.Textbox(
+                        label="Your Answers (e.g., '1: A, 2: B')",
+                        lines=5,
+                        placeholder="Type your answers here...",
+                        scale=3
+                    )
+                    check_btn = gr.Button("✅ Check", variant="primary", scale=1)
                 feedback_output = gr.Markdown(label="Feedback")
                 check_btn.click(
             # TAB 4: BUILD A LESSON
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📚 Build a Lesson", id="lesson"):
+                gr.HTML('<div class="module-info">📚 Generate structured lessons with learning objectives, explanations, exercises, and self-assessment quizzes.</div>')
                 with gr.Row():
+                    lesson_topic = gr.Dropdown(
+                        choices=TOPIC_CHOICES,
+                        label="Lesson Topic",
+                        allow_custom_value=True,
+                        value="RNA-seq: Differential Expression (DESeq2)"
+                    )
+                    lesson_level = gr.Radio(
+                        choices=DIFFICULTY_LEVELS,
+                        value="Intermediate",
+                        label="Level"
+                    )
                 with gr.Row():
+                    include_exercises = gr.Checkbox(label="Include Exercises", value=True)
+                    include_quiz = gr.Checkbox(label="Include Quiz", value=True)
+                    generate_lesson_btn = gr.Button("📝 Generate Lesson", variant="primary")
                 lesson_output = gr.Markdown(label="Generated Lesson")
             # TAB 5: WORKFLOW COACH
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🔬 Workflow Coach", id="workflow"):
+                gr.HTML('<div class="module-info">🔬 Step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask about any step.</div>')
                 workflow_selector = gr.Dropdown(
                     choices=WORKFLOW_CHOICES,
                     type="messages",
                     additional_inputs=[
                         workflow_selector,
+                        gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
                     ],
+                    additional_inputs_accordion=gr.Accordion("⚙️", open=False, visible=False),
                     examples=[
+                        "Walk me through the complete pipeline from raw FASTQ to DE results.",
+                        "I'm at alignment. What should I check before counting?",
                         "My mapping rate is only 45%. What could be wrong?",
+                        "How do I choose between STAR and HISAT2?",
+                        "What parameters for GATK HaplotypeCaller on exome data?",
+                        "How do I set DADA2 truncation parameters?",
                     ],
                 )
             # TAB 6: PAPER TO LESSON
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("📰 Paper to Lesson", id="paper"):
+                gr.HTML('<div class="module-info">📰 Convert research papers into teaching material. Upload a paper in the Upload tab first, then generate lessons, slides, or quizzes from it.</div>')
                 output_format = gr.Radio(
                     choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
                 gr.ChatInterface(
                     fn=paper_to_lesson_respond,
                     type="messages",
+                    additional_inputs=[output_format, rag_store],
                     additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
                     examples=[
                         "Convert this paper into a 45-minute lecture plan.",
+                        "Create a slide outline covering the key methods.",
+                        "Generate study notes on the bioinformatics methods.",
+                        "Create quiz questions on this paper's methodology.",
                     ],
                 )
             # TAB 7: VIVA PRACTICE
             # ══════════════════════════════════════════════════════════════
             with gr.Tab("🎓 Viva Practice", id="viva"):
+                gr.HTML('<div class="module-info">🎓 Practice oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes deeper understanding.</div>')
                 with gr.Row():
                     viva_topic = gr.Dropdown(
                     viva_difficulty = gr.Radio(
                         choices=DIFFICULTY_LEVELS,
                         value="Intermediate",
+                        label="Difficulty"
                     )
                 gr.ChatInterface(
                     fn=viva_respond,
                     type="messages",
+                    additional_inputs=[viva_topic, viva_difficulty],
+                    additional_inputs_accordion=gr.Accordion("⚙️", open=False, visible=False),
                     examples=[
+                        "I'm ready for my viva. Start with your first question.",
+                        "Focus on the statistical aspects of RNA-seq.",
                         "Ask me about variant calling and interpretation.",
+                        "Test my understanding of microbiome diversity.",
                     ],
                 )
+        # ── Footer ─────────────────────────────────────────────────────
         gr.HTML("""
         <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
             <p><strong>Bioinformatics with BB Tutor</strong> — Educational AI Assistant</p>
+            <p>⚠️ For educational purposes only. Not for clinical use.</p>
+            <p>RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · Methylation · Small RNA · Targeted Panels · Long-read · Spatial · Multi-omics</p>
         </div>
         """)