Spaces:

Babajaan
/

bioinformatics-bb-tutor

Sleeping

App Files Files Community

bioinformatics-bb-tutor / app.py

Babajaan

Fix: ChatInterface examples must be list-of-lists matching additional_inputs count

28ad690 verified 14 days ago

raw

history blame contribute delete

45.8 kB

	"""
	Bioinformatics with BB Tutor — Complete Application
	A production bioinformatics teaching assistant with 7 modules.

	Architecture:
	- Backend: LLMService (HuggingFace InferenceClient), RAGService (sentence-transformers),
	DocumentParser (PyMuPDF + text), knowledge_base (domain content)
	- Frontend: 7 Gradio tabs with ChatInterface, file upload, quiz generation, lesson building
	- Data flow: User query → RAG retrieval → LLM with context → streaming response
	- Shared state: rag_store (gr.State) holds uploaded document chunks + embeddings across tabs
	"""

	import gradio as gr
	import numpy as np
	import os
	from pathlib import Path

	# ── Conditional imports with fallbacks ────────────────────────────────────────
	try:
	import fitz # PyMuPDF
	HAS_FITZ = True
	except ImportError:
	HAS_FITZ = False
	print("Warning: PyMuPDF not available. PDF parsing disabled.")

	try:
	from sentence_transformers import SentenceTransformer
	HAS_ST = True
	except ImportError:
	HAS_ST = False
	print("Warning: sentence-transformers not available. Embedding search disabled.")

	try:
	from huggingface_hub import InferenceClient
	HAS_HF = True
	except ImportError:
	HAS_HF = False
	print("Warning: huggingface_hub not available. LLM service disabled.")

	# ── Import knowledge base ────────────────────────────────────────────────────
	from knowledge_base import (
	DOMAIN_TAXONOMY, WORKFLOWS, GLOSSARY, COMMON_MISCONCEPTIONS,
	SYSTEM_PROMPTS, QUIZ_TEMPLATES, LESSON_TEMPLATE,
	TOPIC_CHOICES, DIFFICULTY_LEVELS, WORKFLOW_CHOICES
	)


	# ============================================================================
	# CONFIGURATION
	# ============================================================================

	LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
	EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	HF_TOKEN = os.environ.get("HF_TOKEN", None)

	CHUNK_SIZE = 400
	CHUNK_OVERLAP = 60
	TOP_K_RETRIEVAL = 3

	DEFAULT_SYSTEM_PROMPT = SYSTEM_PROMPTS["ask_tutor"]
	DEFAULT_RAG_STATE = {"chunks": [], "embeddings": None}


	# ============================================================================
	# BACKEND SERVICES — Singleton Pattern
	# ============================================================================

	class LLMService:
	"""Lazy-initialized LLM inference service."""

	_instance = None
	_initialized = False

	def __new__(cls):
	if cls._instance is None:
	cls._instance = super().__new__(cls)
	return cls._instance

	def __init__(self):
	if LLMService._initialized:
	return
	LLMService._initialized = True
	self.client = None
	self._try_init()

	def _try_init(self):
	if not HAS_HF:
	print("LLMService: huggingface_hub not available")
	return
	if not HF_TOKEN:
	print("LLMService: HF_TOKEN not set in environment")
	return
	try:
	self.client = InferenceClient(
	model=LLM_MODEL,
	token=HF_TOKEN,
	timeout=120,
	)
	print("LLMService: Initialized successfully")
	except Exception as e:
	print(f"LLMService: Failed to initialize: {e}")
	self.client = None

	def is_available(self):
	return self.client is not None

	def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
	"""Stream chat completion. Yields partial response strings."""
	if not self.is_available():
	yield self._fallback_response(messages)
	return
	try:
	partial = ""
	for chunk in self.client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	stream=True,
	):
	token = ""
	if hasattr(chunk, 'choices') and chunk.choices:
	choice = chunk.choices[0]
	if hasattr(choice, 'delta') and hasattr(choice.delta, 'content'):
	token = choice.delta.content or ""
	partial += token
	yield partial
	except Exception as e:
	print(f"LLM stream error: {e}")
	yield f"⚠️ LLM API error: {str(e)}\n\nPlease check your HF_TOKEN in Space settings and ensure the model '{LLM_MODEL}' is accessible.\n\nThe tutor is still functional using its knowledge base for many questions — try asking about specific bioinformatics topics!"

	def generate(self, messages, temperature=0.7, max_tokens=1024):
	"""Non-streaming generation. Returns complete response."""
	if not self.is_available():
	return self._fallback_response(messages)
	try:
	response = self.client.chat_completion(
	messages=messages,
	max_tokens=max_tokens,
	temperature=temperature,
	top_p=0.9,
	stream=False,
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"LLM generate error: {e}")
	return f"⚠️ LLM API error: {str(e)}\n\nThe tutor can still answer from its knowledge base. Try asking about specific concepts!"

	def _fallback_response(self, messages):
	"""Knowledge-base fallback when LLM unavailable."""
	user_msg = ""
	for m in reversed(messages):
	if isinstance(m, dict) and m.get("role") == "user":
	user_msg = m.get("content", "").lower()
	break
	if not user_msg:
	return "⚠️ LLM not available. Add HF_TOKEN in Space settings to enable AI responses.\n\nMeanwhile, the knowledge base covers: DESeq2, variant calling, microbiome diversity, scRNA-seq clustering, and more. Try asking a specific question!"

	response_parts = []
	for term, definition in GLOSSARY.items():
	if term.lower() in user_msg:
	response_parts.append(f"{term}: {definition}")
	if len(response_parts) >= 3:
	break

	for wf_key, wf in WORKFLOWS.items():
	if any(kw in user_msg for kw in wf["name"].lower().split()):
	response_parts.append(f"\n### {wf['name']}")
	for step in wf["steps"][:3]:
	response_parts.append(f"Step {step['step']}: {step['name']}\n{step['description']}")
	break

	for misc in COMMON_MISCONCEPTIONS:
	if misc["domain"].replace("_", " ") in user_msg or any(w in user_msg for w in misc["misconception"].lower().split()[:5]):
	response_parts.append(f"\n⚠️ Common Misconception: {misc['misconception']}\n\n✅ Correction: {misc['correction']}")
	break

	if response_parts:
	return "📚 Responding from knowledge base (LLM not configured):\n\n" + "\n\n".join(response_parts)
	return (
	"⚠️ AI responses require HF_TOKEN.\n\n"
	"To enable full AI-powered responses:\n"
	"1. Go to your HuggingFace account → Settings → Access Tokens\n"
	"2. Create a token with 'inference-api' scope\n"
	"3. Add it as a Secret named `HF_TOKEN` in this Space's Settings\n\n"
	"The knowledge base can still answer many questions. Try asking about 'RNA-seq workflow', 'variant calling', or 'microbiome diversity'!"
	)


	class RAGService:
	"""Document retrieval with lazy embedding model loading."""

	_instance = None
	_initialized = False

	def __new__(cls):
	if cls._instance is None:
	cls._instance = super().__new__(cls)
	return cls._instance

	def __init__(self):
	if RAGService._initialized:
	return
	RAGService._initialized = True
	self.embedder = None
	self.kb_chunks = []
	self.kb_metadata = []
	self.kb_embeddings = None
	self._build_kb_index()

	def _ensure_embedder(self):
	if self.embedder is not None:
	return True
	if not HAS_ST:
	return False
	try:
	print("RAGService: Loading embedding model...")
	self.embedder = SentenceTransformer(EMBED_MODEL)
	print("RAGService: Embedding model loaded")
	if self.kb_chunks:
	self.kb_embeddings = self.embedder.encode(
	self.kb_chunks,
	convert_to_numpy=True,
	normalize_embeddings=True,
	show_progress_bar=False,
	batch_size=32,
	)
	print(f"RAGService: KB embedded ({len(self.kb_chunks)} chunks)")
	return True
	except Exception as e:
	print(f"RAGService: Failed to load embedder: {e}")
	return False

	def _build_kb_index(self):
	chunks = []
	metadata = []
	for term, definition in GLOSSARY.items():
	chunks.append(f"{term}: {definition}")
	metadata.append({"source": "glossary", "topic": term, "type": "definition"})
	for wf_key, wf in WORKFLOWS.items():
	for step in wf["steps"]:
	step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
	if step.get("tools"):
	step_text += f" Tools: {', '.join(step['tools'])}."
	if step.get("common_mistakes"):
	step_text += " Common mistakes: " + "; ".join(step["common_mistakes"])
	chunks.append(step_text)
	metadata.append({
	"source": "workflow",
	"topic": wf["domain"],
	"type": "workflow_step",
	"step": step["step"],
	"workflow": wf_key
	})
	for misc in COMMON_MISCONCEPTIONS:
	text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
	chunks.append(text)
	metadata.append({
	"source": "misconception",
	"topic": misc["domain"],
	"type": "misconception",
	"severity": misc["severity"]
	})
	for key, domain in DOMAIN_TAXONOMY.items():
	text = f"{domain['name']} covers: {', '.join(domain['subtopics'][:10])}"
	chunks.append(text)
	metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
	self.kb_chunks = chunks
	self.kb_metadata = metadata
	print(f"RAGService: Built KB with {len(chunks)} chunks")

	def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
	if not self._ensure_embedder():
	return self._keyword_search(query, top_k)
	try:
	query_embedding = self.embedder.encode(
	[query],
	convert_to_numpy=True,
	normalize_embeddings=True,
	)
	results = []
	if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
	kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
	top_indices = np.argsort(kb_scores)[::-1][:top_k]
	for idx in top_indices:
	if kb_scores[idx] > 0.15:
	results.append({
	"text": self.kb_chunks[idx],
	"score": float(kb_scores[idx]),
	"metadata": self.kb_metadata[idx]
	})
	if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
	user_scores = np.dot(query_embedding, user_embeddings.T)[0]
	top_user = np.argsort(user_scores)[::-1][:top_k]
	for idx in top_user:
	if user_scores[idx] > 0.15:
	results.append({
	"text": user_chunks[idx],
	"score": float(user_scores[idx]),
	"metadata": {"source": "uploaded", "type": "user_content"}
	})
	results.sort(key=lambda x: x["score"], reverse=True)
	return results[:top_k]
	except Exception as e:
	print(f"RAG search error: {e}")
	return self._keyword_search(query, top_k)

	def _keyword_search(self, query, top_k=3):
	query_words = set(query.lower().split())
	scored = []
	for i, chunk in enumerate(self.kb_chunks):
	chunk_words = set(chunk.lower().split())
	overlap = len(query_words & chunk_words)
	if overlap > 0:
	scored.append({
	"text": chunk,
	"score": overlap / max(len(query_words), 1),
	"metadata": self.kb_metadata[i]
	})
	scored.sort(key=lambda x: x["score"], reverse=True)
	return scored[:top_k]

	def embed_chunks(self, chunks):
	if not self._ensure_embedder() or not chunks:
	return None
	try:
	return self.embedder.encode(
	chunks,
	convert_to_numpy=True,
	normalize_embeddings=True,
	show_progress_bar=False,
	batch_size=16,
	)
	except Exception as e:
	print(f"Embed chunks error: {e}")
	return None


	class DocumentParser:
	@staticmethod
	def parse_file(filepath):
	if filepath is None:
	return "", []
	filepath = str(filepath)
	ext = Path(filepath).suffix.lower()
	try:
	if ext == ".pdf" and HAS_FITZ:
	return DocumentParser._parse_pdf(filepath)
	elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam", ".bam"):
	return DocumentParser._parse_text(filepath)
	else:
	return f"Unsupported file type: {ext}", []
	except Exception as e:
	return f"Error parsing file: {str(e)}", []

	@staticmethod
	def _parse_pdf(filepath):
	doc = fitz.open(filepath)
	pages = []
	for page_num in range(len(doc)):
	text = doc[page_num].get_text()
	if text.strip():
	pages.append(text)
	doc.close()
	full_text = "\n\n".join(pages)
	chunks = DocumentParser._chunk_text(full_text)
	return full_text, chunks

	@staticmethod
	def _parse_text(filepath):
	with open(filepath, "r", encoding="utf-8", errors="replace") as f:
	text = f.read()
	chunks = DocumentParser._chunk_text(text)
	return text, chunks

	@staticmethod
	def _chunk_text(text, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
	words = text.split()
	if len(words) <= chunk_size:
	return [text] if text.strip() else []
	chunks = []
	for i in range(0, len(words), chunk_size - overlap):
	chunk = " ".join(words[i:i + chunk_size])
	if chunk.strip():
	chunks.append(chunk)
	return chunks


	llm_service = LLMService()
	rag_service = RAGService()
	doc_parser = DocumentParser()
	print(f"🧬 BB Tutor initialized. LLM: {llm_service.is_available()}, Embeddings: {rag_service.embedder is not None}")


	# ============================================================================
	# HELPER FUNCTIONS
	# ============================================================================

	def _rag_context(query, user_chunks=None, user_embeddings=None):
	results = rag_service.search(query, top_k=TOP_K_RETRIEVAL,
	user_chunks=user_chunks, user_embeddings=user_embeddings)
	if not results:
	return ""
	parts = ["RELEVANT KNOWLEDGE BASE CONTEXT:"]
	for r in results:
	source = r["metadata"].get("source", "kb")
	parts.append(f"[{source}] {r['text'][:800]}")
	return "\n".join(parts)


	def _format_history(history):
	messages = []
	for h in history:
	if isinstance(h, dict):
	messages.append(h)
	elif isinstance(h, (list, tuple)):
	if len(h) >= 1 and h[0]:
	messages.append({"role": "user", "content": str(h[0])})
	if len(h) >= 2 and h[1]:
	messages.append({"role": "assistant", "content": str(h[1])})
	return messages


	# ============================================================================
	# MODULE HANDLERS
	# ============================================================================

	def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_state):
	if not message or not message.strip():
	yield ""
	return
	rag_state = rag_state or DEFAULT_RAG_STATE
	user_chunks = rag_state.get("chunks", [])
	user_embeddings = rag_state.get("embeddings")
	rag_ctx = _rag_context(message, user_chunks, user_embeddings)
	messages = [{"role": "system", "content": system_prompt}]
	if rag_ctx:
	messages.append({"role": "system", "content": rag_ctx})
	messages.extend(_format_history(history))
	messages.append({"role": "user", "content": message})
	for partial in llm_service.stream_chat(messages, temperature, max_tokens):
	yield partial


	def process_upload(file, rag_state):
	rag_state = rag_state or DEFAULT_RAG_STATE
	if file is None:
	return "📁 Please upload a file first.", "", rag_state
	full_text, chunks = doc_parser.parse_file(file)
	if not chunks:
	return "⚠️ Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_state
	embeddings = rag_service.embed_chunks(chunks)
	new_state = {"chunks": chunks, "embeddings": embeddings}
	preview = full_text[:2500] if len(full_text) > 2500 else full_text
	msgs = [
	{"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
	{"role": "user", "content": f"Analyze and explain this bioinformatics document:\n\n{preview}"}
	]
	explanation = llm_service.generate(msgs, temperature=0.5, max_tokens=1500)
	stats = f"📊 Document Stats: {len(chunks)} chunks, ~{len(full_text.split())} words \| "
	stats += f"File type: {Path(str(file)).suffix} \| "
	stats += "🤖 AI-powered" if llm_service.is_available() else "📚 Knowledge-base mode"
	stats += f"\n\n---\n\n"
	return stats + explanation, full_text[:5000], new_state


	def upload_chat_respond(message, history, rag_state):
	if not message or not message.strip():
	yield ""
	return
	rag_state = rag_state or DEFAULT_RAG_STATE
	user_chunks = rag_state.get("chunks", [])
	user_embeddings = rag_state.get("embeddings")
	if not user_chunks:
	yield "📁 Please upload a document in the panel above, then ask questions about it.\n\nYour uploaded document will be indexed and searchable across all modules!"
	return
	rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
	ctx = "CONTEXT FROM UPLOADED DOCUMENT:\n"
	if rag_results:
	for r in rag_results:
	ctx += f"\n{r['text'][:600]}\n"
	else:
	ctx += "(No highly relevant passages found — answering from general knowledge)\n"
	messages = [
	{"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
	{"role": "system", "content": ctx},
	]
	messages.extend(_format_history(history))
	messages.append({"role": "user", "content": message})
	for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
	yield partial


	def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_state):
	if not topic:
	return "❓ Please select or enter a topic first.", ""
	rag_results = rag_service.search(topic, top_k=3)
	context = ""
	if rag_results:
	context = "Reference material:\n" + "\n".join(r["text"][:500] for r in rag_results)
	template_key = {
	"Multiple Choice (MCQ)": "mcq",
	"True/False": "true_false",
	"Short Answer": "short_answer"
	}.get(quiz_type, "mcq")
	quiz_prompt = QUIZ_TEMPLATES[template_key].format(
	n=int(num_questions), topic=topic, difficulty=difficulty
	)
	messages = [{"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]}]
	if context:
	messages.append({"role": "system", "content": context})
	messages.append({"role": "user", "content": quiz_prompt})
	response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
	formatted = f"## 🧠 {topic} Quiz — {difficulty}\n\n"
	formatted += f"Format: {quiz_type} \| Questions: {int(num_questions)}\n\n---\n\n"
	formatted += response
	return formatted, response


	def check_quiz_answers(user_answers, answer_key):
	if not user_answers or not user_answers.strip():
	return "✍️ Please enter your answers above before checking."
	if not answer_key:
	return "⚠️ Please generate a quiz first (use the panel above)."
	messages = [
	{"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare student answers to correct answers. For each: mark ✅ or ❌, explain briefly, provide correct answer if wrong. Be encouraging. Give final score."},
	{"role": "user", "content": f"QUIZ AND ANSWERS:\n{answer_key}\n\nSTUDENT ANSWERS:\n{user_answers}\n\nGrade each:"}
	]
	return llm_service.generate(messages, temperature=0.3, max_tokens=1500)


	def generate_lesson(topic, level, include_exercises, include_quiz):
	if not topic:
	return "📚 Please select or enter a topic."
	rag_results = rag_service.search(topic, top_k=4)
	context = ""
	if rag_results:
	context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
	prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
	if include_exercises:
	prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
	if include_quiz:
	prompt += "\n\nInclude a 5-question self-assessment quiz (with answers)."
	messages = [{"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]}]
	if context:
	messages.append({"role": "system", "content": context})
	messages.append({"role": "user", "content": prompt})
	return llm_service.generate(messages, temperature=0.7, max_tokens=3000)


	def workflow_respond(message, history, selected_workflow, temperature):
	if not message or not message.strip():
	yield ""
	return
	workflow_context = ""
	for wf_key, wf in WORKFLOWS.items():
	if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
	workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
	for step in wf["steps"]:
	workflow_context += f"Step {step['step']}: {step['name']}\n"
	workflow_context += f" {step['description']}\n"
	if step.get("tools"):
	workflow_context += f" Tools: {', '.join(step['tools'])}\n"
	if step.get("common_mistakes"):
	workflow_context += f" ⚠️ Common mistakes: {'; '.join(step['common_mistakes'])}\n"
	workflow_context += "\n"
	break
	rag_results = rag_service.search(message, top_k=2)
	if rag_results:
	workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"][:500] for r in rag_results)
	messages = [{"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]}]
	if workflow_context:
	messages.append({"role": "system", "content": workflow_context})
	messages.extend(_format_history(history))
	messages.append({"role": "user", "content": message})
	for partial in llm_service.stream_chat(messages, temperature, 1500):
	yield partial


	def paper_to_lesson_respond(message, history, output_format, rag_state):
	if not message or not message.strip():
	yield ""
	return
	rag_state = rag_state or DEFAULT_RAG_STATE
	user_chunks = rag_state.get("chunks", [])
	user_embeddings = rag_state.get("embeddings")
	context = ""
	if user_chunks:
	rag_results = rag_service.search(message, top_k=4, user_chunks=user_chunks, user_embeddings=user_embeddings)
	if rag_results:
	context = "PAPER CONTENT:\n" + "\n".join(r["text"][:600] for r in rag_results)
	format_instruction = {
	"Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
	"Slide Outline": "Create a slide-by-slide outline with key points for each slide.",
	"Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
	"Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
	}.get(output_format, "Create a structured lesson plan.")
	messages = [{"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]}]
	if context:
	messages.append({"role": "system", "content": context})
	messages.extend(_format_history(history))
	full_msg = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
	messages.append({"role": "user", "content": full_msg})
	for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
	yield partial


	def viva_respond(message, history, topic, difficulty):
	if not message or not message.strip():
	yield ""
	return
	rag_results = rag_service.search(f"{topic} {message}", top_k=3)
	context = ""
	if rag_results:
	context = "Reference:\n" + "\n".join(r["text"][:500] for r in rag_results)
	messages = [
	{"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
	{"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY: {difficulty}\n\n{context}"},
	]
	messages.extend(_format_history(history))
	messages.append({"role": "user", "content": message})
	for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
	yield partial


	# ============================================================================
	# GRADIO APP ASSEMBLY
	# ============================================================================

	CUSTOM_CSS = """
	.main-header {
	text-align: center; padding: 20px;
	background: linear-gradient(135deg, #1a5276 0%, #2e86c1 50%, #48c9b0 100%);
	border-radius: 12px; margin-bottom: 20px; color: white;
	}
	.main-header h1 { color: white; font-size: 2em; margin: 0; }
	.main-header p { color: #ecf0f1; margin: 5px 0; }
	.module-info {
	background: #f0f9ff; border-left: 4px solid #2e86c1;
	padding: 12px 16px; margin-bottom: 16px; border-radius: 0 8px 8px 0;
	}
	.safety-notice {
	background: #fff3e0; border-left: 4px solid #f39c12;
	padding: 10px 14px; margin-top: 10px; border-radius: 0 8px 8px 0; font-size: 0.9em;
	}
	.status-badge {
	display: inline-block; padding: 4px 12px; border-radius: 12px;
	font-size: 0.85em; font-weight: bold;
	}
	.status-on { background: #d4edda; color: #155724; }
	.status-off { background: #f8d7da; color: #721c24; }
	"""


	def build_app():
	with gr.Blocks(title="Bioinformatics with BB Tutor", css=CUSTOM_CSS) as demo:

	# ── Global shared state ─────────────────────────────────────────
	rag_store = gr.State(DEFAULT_RAG_STATE)

	# ── Status indicator ────────────────────────────────────────────
	llm_status = "🟢 AI Enabled" if llm_service.is_available() else "🔴 AI Offline (Knowledge Base Active)"

	# ── Header ─────────────────────────────────────────────────────
	gr.HTML(f"""
	<div class="main-header">
	<h1>🧬 Bioinformatics with BB Tutor</h1>
	<p>AI-powered bioinformatics teaching assistant</p>
	<p style="font-size: 0.85em; opacity: 0.9;">
	RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · and more
	</p>
	<p style="font-size: 0.8em; margin-top: 8px;">
	<span class="status-badge {'status-on' if llm_service.is_available() else 'status-off'}">{llm_status}</span>
	</p>
	</div>
	""")

	with gr.Tabs():

	# ══════════════════════════════════════════════════════════════
	# TAB 1: ASK THE TUTOR
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("🧬 Ask the Tutor", id="ask"):
	gr.HTML('<div class="module-info">💡 Ask any bioinformatics question. RAG-augmented responses from a curated knowledge base covering 15+ domains.</div>')

	# Examples must be list-of-lists matching fn signature: (message, history, system_prompt, temperature, max_tokens, rag_state)
	# Additional inputs: [system_prompt, temperature, max_tokens, rag_store] → 4 values per example
	ask_examples = [
	["What is the difference between DESeq2 and edgeR?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["Explain the GATK variant calling pipeline step by step.", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["What is the difference between alpha and beta diversity?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["Why should I use adjusted p-values instead of raw p-values?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["Explain the single-cell RNA-seq analysis workflow.", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["What is BQSR and why is it important?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["How do I choose between STAR and HISAT2 for alignment?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	["What common mistakes do students make with DESeq2?", DEFAULT_SYSTEM_PROMPT, 0.7, 1024, DEFAULT_RAG_STATE],
	]

	gr.ChatInterface(
	fn=tutor_respond,
	type="messages",
	additional_inputs=[
	gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="System Prompt", lines=2, visible=False),
	gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
	gr.Slider(256, 4096, 1024, step=256, label="Max Tokens", visible=False),
	rag_store,
	],
	additional_inputs_accordion=gr.Accordion("⚙️ Advanced", open=False, visible=False),
	examples=ask_examples,
	)
	gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> Not for clinical interpretation. Always consult qualified professionals for clinical genomics.</div>')

	# ══════════════════════════════════════════════════════════════
	# TAB 2: UPLOAD & EXPLAIN
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("📄 Upload & Explain", id="upload"):
	gr.HTML('<div class="module-info">📄 Upload bioinformatics documents (PDF, TXT, FASTA, VCF, etc.) and get AI-powered analysis. Content is indexed and searchable across all modules.</div>')

	with gr.Row():
	with gr.Column(scale=1):
	file_input = gr.File(
	label="Upload Document",
	file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
	".fasta", ".fa", ".fastq", ".vcf", ".bed",
	".gff", ".gtf", ".sam", ".bam"],
	file_count="single", type="filepath",
	)
	process_btn = gr.Button("🔍 Analyze Document", variant="primary")
	gr.Markdown("Supported: PDF, text, FASTA/FASTQ, VCF, BED, GFF/GTF, SAM/BAM, CSV/TSV")
	with gr.Column(scale=2):
	explanation_output = gr.Markdown(label="Analysis & Explanation")

	with gr.Accordion("📝 Raw Extracted Text", open=False):
	raw_text_output = gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)

	process_btn.click(
	fn=process_upload,
	inputs=[file_input, rag_store],
	outputs=[explanation_output, raw_text_output, rag_store],
	)

	gr.Markdown("### 💬 Chat About Your Document")

	# fn signature: (message, history, rag_state) → 1 additional input
	upload_chat_examples = [
	["Summarize the key methods in this paper.", DEFAULT_RAG_STATE],
	["What bioinformatics tools are mentioned?", DEFAULT_RAG_STATE],
	["Explain the main findings in simple terms.", DEFAULT_RAG_STATE],
	["What are the limitations of this analysis?", DEFAULT_RAG_STATE],
	]

	gr.ChatInterface(
	fn=upload_chat_respond,
	type="messages",
	additional_inputs=[rag_store],
	examples=upload_chat_examples,
	)

	# ══════════════════════════════════════════════════════════════
	# TAB 3: QUIZ ME
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("❓ Quiz Me", id="quiz"):
	gr.HTML('<div class="module-info">🧠 Test your knowledge with AI-generated quizzes across all bioinformatics domains.</div>')

	with gr.Row():
	quiz_topic = gr.Dropdown(
	choices=TOPIC_CHOICES, label="Select Topic",
	allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
	)
	quiz_type = gr.Radio(
	choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
	value="Multiple Choice (MCQ)", label="Format"
	)
	with gr.Row():
	quiz_difficulty = gr.Radio(
	choices=DIFFICULTY_LEVELS, value="Intermediate", label="Difficulty"
	)
	num_questions = gr.Slider(1, 10, 5, step=1, label="# Questions")
	generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary")

	quiz_output = gr.Markdown(label="Generated Quiz")
	answer_key_state = gr.State("")

	generate_quiz_btn.click(
	fn=generate_quiz,
	inputs=[quiz_topic, quiz_type, num_questions, quiz_difficulty, rag_store],
	outputs=[quiz_output, answer_key_state],
	)

	gr.Markdown("---")
	gr.Markdown("### ✍️ Submit Your Answers")
	with gr.Row():
	user_answers = gr.Textbox(
	label="Your Answers (e.g., '1: A, 2: B')",
	lines=5, placeholder="Type your answers here...", scale=3
	)
	check_btn = gr.Button("✅ Check", variant="primary", scale=1)
	feedback_output = gr.Markdown(label="Feedback")

	check_btn.click(
	fn=check_quiz_answers,
	inputs=[user_answers, answer_key_state],
	outputs=[feedback_output],
	)

	# ══════════════════════════════════════════════════════════════
	# TAB 4: BUILD A LESSON
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("📚 Build a Lesson", id="lesson"):
	gr.HTML('<div class="module-info">📚 Generate structured lessons with learning objectives, explanations, exercises, and self-assessment quizzes.</div>')

	with gr.Row():
	lesson_topic = gr.Dropdown(
	choices=TOPIC_CHOICES, label="Lesson Topic",
	allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
	)
	lesson_level = gr.Radio(
	choices=DIFFICULTY_LEVELS, value="Intermediate", label="Level"
	)
	with gr.Row():
	include_exercises = gr.Checkbox(label="Include Exercises", value=True)
	include_quiz = gr.Checkbox(label="Include Quiz", value=True)
	generate_lesson_btn = gr.Button("📝 Generate Lesson", variant="primary")

	lesson_output = gr.Markdown(label="Generated Lesson")

	generate_lesson_btn.click(
	fn=generate_lesson,
	inputs=[lesson_topic, lesson_level, include_exercises, include_quiz],
	outputs=[lesson_output],
	)

	# ══════════════════════════════════════════════════════════════
	# TAB 5: WORKFLOW COACH
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("🔬 Workflow Coach", id="workflow"):
	gr.HTML('<div class="module-info">🔬 Step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask about any step.</div>')

	workflow_selector = gr.Dropdown(
	choices=WORKFLOW_CHOICES, label="Select Workflow",
	value="Bulk RNA-seq: Full DE Analysis Pipeline", allow_custom_value=True,
	)

	# fn signature: (message, history, selected_workflow, temperature) → 2 additional inputs
	workflow_examples = [
	["Walk me through the complete pipeline from raw FASTQ to DE results.", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
	["I'm at alignment. What should I check before counting?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
	["My mapping rate is only 45%. What could be wrong?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
	["How do I choose between STAR and HISAT2?", "Bulk RNA-seq: Full DE Analysis Pipeline", 0.7],
	["What parameters for GATK HaplotypeCaller on exome data?", "Exome Sequencing: Variant Calling Pipeline", 0.7],
	["How do I set DADA2 truncation parameters?", "Microbiome: 16S Amplicon Analysis (QIIME2)", 0.7],
	]

	gr.ChatInterface(
	fn=workflow_respond,
	type="messages",
	additional_inputs=[
	workflow_selector,
	gr.Slider(0.1, 1.5, 0.7, step=0.1, label="Temperature", visible=False),
	],
	additional_inputs_accordion=gr.Accordion("⚙️", open=False, visible=False),
	examples=workflow_examples,
	)

	# ══════════════════════════════════════════════════════════════
	# TAB 6: PAPER TO LESSON
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("📰 Paper to Lesson", id="paper"):
	gr.HTML('<div class="module-info">📰 Convert research papers into teaching material. Upload a paper in the Upload tab first, then generate lessons, slides, or quizzes from it.</div>')

	output_format = gr.Radio(
	choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
	value="Lesson Plan", label="Output Format"
	)

	# fn signature: (message, history, output_format, rag_state) → 2 additional inputs
	paper_examples = [
	["Convert this paper into a 45-minute lecture plan.", "Lesson Plan", DEFAULT_RAG_STATE],
	["Create a slide outline covering the key methods.", "Slide Outline", DEFAULT_RAG_STATE],
	["Generate study notes on the bioinformatics methods.", "Study Notes", DEFAULT_RAG_STATE],
	["Create quiz questions on this paper's methodology.", "Quiz Questions", DEFAULT_RAG_STATE],
	]

	gr.ChatInterface(
	fn=paper_to_lesson_respond,
	type="messages",
	additional_inputs=[output_format, rag_store],
	examples=paper_examples,
	)

	# ══════════════════════════════════════════════════════════════
	# TAB 7: VIVA PRACTICE
	# ══════════════════════════════════════════════════════════════
	with gr.Tab("🎓 Viva Practice", id="viva"):
	gr.HTML('<div class="module-info">🎓 Practice oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes deeper understanding.</div>')

	with gr.Row():
	viva_topic = gr.Dropdown(
	choices=TOPIC_CHOICES, label="Viva Topic",
	allow_custom_value=True, value="RNA-seq: Differential Expression (DESeq2)"
	)
	viva_difficulty = gr.Radio(
	choices=DIFFICULTY_LEVELS, value="Intermediate", label="Difficulty"
	)

	# fn signature: (message, history, topic, difficulty) → 2 additional inputs
	viva_examples = [
	["I'm ready for my viva. Start with your first question.", "RNA-seq: Differential Expression (DESeq2)", "Intermediate"],
	["Focus on the statistical aspects of RNA-seq.", "RNA-seq: Differential Expression (DESeq2)", "Intermediate"],
	["Ask me about variant calling and interpretation.", "Variant Interpretation: ACMG Guidelines", "Intermediate"],
	["Test my understanding of microbiome diversity.", "Microbiome: Alpha & Beta Diversity", "Intermediate"],
	]

	gr.ChatInterface(
	fn=viva_respond,
	type="messages",
	additional_inputs=[viva_topic, viva_difficulty],
	examples=viva_examples,
	)

	# ── Footer ─────────────────────────────────────────────────────
	gr.HTML("""
	<div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
	<p><strong>Bioinformatics with BB Tutor</strong> — Educational AI Assistant</p>
	<p>⚠️ For educational purposes only. Not for clinical use.</p>
	<p>RNA-seq · Exome · Genome · Microbiome · Variants · Molecular Genetics · scRNA-seq · ATAC-seq · ChIP-seq · Methylation · Small RNA · Targeted Panels · Long-read · Spatial · Multi-omics</p>
	</div>
	""")

	return demo


	if __name__ == "__main__":
	demo = build_app()
	demo.launch(server_name="0.0.0.0", server_port=7860, share=False)