Babajaan commited on
Commit
6e8bd4c
Β·
verified Β·
1 Parent(s): fe2b396

Add complete app.py with all 7 modules

Browse files
Files changed (1) hide show
  1. app.py +1089 -0
app.py ADDED
@@ -0,0 +1,1089 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bioinformatics with BB Tutor β€” Complete Application
3
+ A production-oriented bioinformatics teaching assistant with 7 modules.
4
+ """
5
+
6
+ import gradio as gr
7
+ import numpy as np
8
+ import json
9
+ import os
10
+ import re
11
+ import time
12
+ import hashlib
13
+ from pathlib import Path
14
+
15
+ # ── Conditional imports with fallbacks ────────────────────────────────────────
16
+ try:
17
+ import fitz # PyMuPDF
18
+ HAS_FITZ = True
19
+ except ImportError:
20
+ HAS_FITZ = False
21
+
22
+ try:
23
+ from sentence_transformers import SentenceTransformer
24
+ HAS_ST = True
25
+ except ImportError:
26
+ HAS_ST = False
27
+
28
+ try:
29
+ from huggingface_hub import InferenceClient
30
+ HAS_HF = True
31
+ except ImportError:
32
+ HAS_HF = False
33
+
34
+ try:
35
+ import pandas as pd
36
+ HAS_PANDAS = True
37
+ except ImportError:
38
+ HAS_PANDAS = False
39
+
40
+ # ── Import knowledge base ────────────────────────────────────────────────────
41
+ from knowledge_base import (
42
+ DOMAIN_TAXONOMY, WORKFLOWS, GLOSSARY, COMMON_MISCONCEPTIONS,
43
+ SYSTEM_PROMPTS, QUIZ_TEMPLATES, LESSON_TEMPLATE,
44
+ TOPIC_CHOICES, DIFFICULTY_LEVELS, WORKFLOW_CHOICES
45
+ )
46
+
47
+
48
+ # ============================================================================
49
+ # CONFIGURATION
50
+ # ============================================================================
51
+
52
+ # Model configuration - uses HF Inference API
53
+ LLM_MODEL = os.environ.get("LLM_MODEL", "mistralai/Mistral-7B-Instruct-v0.3")
54
+ EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
55
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
56
+
57
+ # RAG configuration
58
+ CHUNK_SIZE = 400 # words per chunk
59
+ CHUNK_OVERLAP = 60 # words overlap
60
+ TOP_K_RETRIEVAL = 3
61
+
62
+
63
+ # ============================================================================
64
+ # BACKEND SERVICES
65
+ # ============================================================================
66
+
67
+ class LLMService:
68
+ """Singleton LLM inference service using HuggingFace Inference API."""
69
+
70
+ def __init__(self):
71
+ self.client = None
72
+ if HAS_HF and HF_TOKEN:
73
+ try:
74
+ self.client = InferenceClient(
75
+ model=LLM_MODEL,
76
+ token=HF_TOKEN,
77
+ timeout=120,
78
+ )
79
+ except Exception as e:
80
+ print(f"Warning: Could not initialize InferenceClient: {e}")
81
+
82
+ def is_available(self):
83
+ return self.client is not None
84
+
85
+ def stream_chat(self, messages, temperature=0.7, max_tokens=1024):
86
+ """Stream a chat completion. Yields partial response strings."""
87
+ if not self.is_available():
88
+ yield self._fallback_response(messages)
89
+ return
90
+
91
+ try:
92
+ partial = ""
93
+ for chunk in self.client.chat_completion(
94
+ messages=messages,
95
+ max_tokens=max_tokens,
96
+ temperature=temperature,
97
+ top_p=0.9,
98
+ stream=True,
99
+ ):
100
+ token = chunk.choices[0].delta.content or ""
101
+ partial += token
102
+ yield partial
103
+ except Exception as e:
104
+ yield f"⚠️ LLM API error: {str(e)}\n\nPlease check that HF_TOKEN is set correctly in the Space settings and the model {LLM_MODEL} is accessible."
105
+
106
+ def generate(self, messages, temperature=0.7, max_tokens=1024):
107
+ """Non-streaming generation. Returns complete response."""
108
+ if not self.is_available():
109
+ return self._fallback_response(messages)
110
+
111
+ try:
112
+ response = self.client.chat_completion(
113
+ messages=messages,
114
+ max_tokens=max_tokens,
115
+ temperature=temperature,
116
+ top_p=0.9,
117
+ stream=False,
118
+ )
119
+ return response.choices[0].message.content
120
+ except Exception as e:
121
+ return f"⚠️ LLM API error: {str(e)}"
122
+
123
+ def _fallback_response(self, messages):
124
+ """Knowledge-base powered fallback when LLM is not available."""
125
+ user_msg = ""
126
+ for m in reversed(messages):
127
+ if m["role"] == "user":
128
+ user_msg = m["content"].lower()
129
+ break
130
+
131
+ # Search knowledge base for relevant content
132
+ response_parts = []
133
+
134
+ # Check glossary
135
+ for term, definition in GLOSSARY.items():
136
+ if term.lower() in user_msg or any(w in user_msg for w in term.lower().split()):
137
+ response_parts.append(f"**{term}**: {definition}")
138
+
139
+ # Check workflows
140
+ for wf_key, wf in WORKFLOWS.items():
141
+ if any(keyword in user_msg for keyword in wf["name"].lower().split()):
142
+ response_parts.append(f"\n### {wf['name']}\n")
143
+ for step in wf["steps"][:3]:
144
+ response_parts.append(f"**Step {step['step']}: {step['name']}**\n{step['description']}")
145
+ break
146
+
147
+ # Check misconceptions
148
+ for misc in COMMON_MISCONCEPTIONS:
149
+ keywords = misc["misconception"].lower().split()
150
+ if any(w in user_msg for w in keywords if len(w) > 4):
151
+ response_parts.append(f"\n⚠️ **Common Misconception**: {misc['misconception']}\n\nβœ… **Correction**: {misc['correction']}")
152
+ break
153
+
154
+ if response_parts:
155
+ return "πŸ“š *Responding from knowledge base (LLM not configured):*\n\n" + "\n\n".join(response_parts)
156
+ else:
157
+ return (
158
+ "⚠️ **LLM is not configured.** To enable AI-powered responses:\n\n"
159
+ "1. Go to Space Settings β†’ Repository Secrets\n"
160
+ "2. Add `HF_TOKEN` with your HuggingFace API token\n"
161
+ "3. The token needs access to inference API\n\n"
162
+ "Currently showing knowledge base results only. "
163
+ "Try asking about specific topics like 'DESeq2', 'variant calling', or 'FASTQ quality'."
164
+ )
165
+
166
+
167
+ class RAGService:
168
+ """Document retrieval service with embedding-based search."""
169
+
170
+ def __init__(self):
171
+ self.embedder = None
172
+ if HAS_ST:
173
+ try:
174
+ self.embedder = SentenceTransformer(EMBED_MODEL)
175
+ except Exception as e:
176
+ print(f"Warning: Could not load embedding model: {e}")
177
+
178
+ # Pre-build knowledge base index
179
+ self.kb_chunks, self.kb_metadata = self._build_kb_index()
180
+ self.kb_embeddings = None
181
+ if self.embedder and self.kb_chunks:
182
+ try:
183
+ self.kb_embeddings = self.embedder.encode(
184
+ self.kb_chunks,
185
+ convert_to_numpy=True,
186
+ normalize_embeddings=True,
187
+ show_progress_bar=False,
188
+ batch_size=32,
189
+ )
190
+ except Exception as e:
191
+ print(f"Warning: Could not embed knowledge base: {e}")
192
+
193
+ def _build_kb_index(self):
194
+ """Build searchable chunks from the knowledge base."""
195
+ chunks = []
196
+ metadata = []
197
+
198
+ # Index glossary terms
199
+ for term, definition in GLOSSARY.items():
200
+ chunks.append(f"{term}: {definition}")
201
+ metadata.append({"source": "glossary", "topic": term, "type": "definition"})
202
+
203
+ # Index workflow steps
204
+ for wf_key, wf in WORKFLOWS.items():
205
+ for step in wf["steps"]:
206
+ step_text = f"{wf['name']} - Step {step['step']}: {step['name']}. {step['description']}"
207
+ if step.get("tools"):
208
+ step_text += f" Tools: {', '.join(step['tools'])}."
209
+ if step.get("common_mistakes"):
210
+ step_text += " Common mistakes: " + "; ".join(step["common_mistakes"])
211
+ chunks.append(step_text)
212
+ metadata.append({
213
+ "source": "workflow",
214
+ "topic": wf["domain"],
215
+ "type": "workflow_step",
216
+ "step": step["step"],
217
+ "workflow": wf_key
218
+ })
219
+
220
+ # Index misconceptions
221
+ for misc in COMMON_MISCONCEPTIONS:
222
+ text = f"Misconception: {misc['misconception']} Correction: {misc['correction']}"
223
+ chunks.append(text)
224
+ metadata.append({
225
+ "source": "misconception",
226
+ "topic": misc["domain"],
227
+ "type": "misconception",
228
+ "severity": misc["severity"]
229
+ })
230
+
231
+ # Index domain taxonomy
232
+ for key, domain in DOMAIN_TAXONOMY.items():
233
+ text = f"{domain['name']} covers these subtopics: {', '.join(domain['subtopics'])}."
234
+ chunks.append(text)
235
+ metadata.append({"source": "taxonomy", "topic": key, "type": "domain_overview"})
236
+
237
+ return chunks, metadata
238
+
239
+ def search(self, query, top_k=TOP_K_RETRIEVAL, user_chunks=None, user_embeddings=None):
240
+ """Search the knowledge base and optional user-uploaded content."""
241
+ if not self.embedder:
242
+ return self._keyword_search(query, top_k)
243
+
244
+ try:
245
+ query_embedding = self.embedder.encode(
246
+ [query],
247
+ convert_to_numpy=True,
248
+ normalize_embeddings=True,
249
+ )
250
+
251
+ results = []
252
+
253
+ # Search knowledge base
254
+ if self.kb_embeddings is not None and len(self.kb_embeddings) > 0:
255
+ kb_scores = np.dot(query_embedding, self.kb_embeddings.T)[0]
256
+ top_indices = np.argsort(kb_scores)[::-1][:top_k]
257
+ for idx in top_indices:
258
+ if kb_scores[idx] > 0.2: # minimum relevance threshold
259
+ results.append({
260
+ "text": self.kb_chunks[idx],
261
+ "score": float(kb_scores[idx]),
262
+ "metadata": self.kb_metadata[idx]
263
+ })
264
+
265
+ # Search user-uploaded content
266
+ if user_chunks and user_embeddings is not None and len(user_embeddings) > 0:
267
+ user_scores = np.dot(query_embedding, user_embeddings.T)[0]
268
+ top_user = np.argsort(user_scores)[::-1][:top_k]
269
+ for idx in top_user:
270
+ if user_scores[idx] > 0.2:
271
+ results.append({
272
+ "text": user_chunks[idx],
273
+ "score": float(user_scores[idx]),
274
+ "metadata": {"source": "uploaded_document", "type": "user_content"}
275
+ })
276
+
277
+ # Sort by score and return top_k
278
+ results.sort(key=lambda x: x["score"], reverse=True)
279
+ return results[:top_k]
280
+
281
+ except Exception as e:
282
+ print(f"Embedding search error: {e}")
283
+ return self._keyword_search(query, top_k)
284
+
285
+ def _keyword_search(self, query, top_k=3):
286
+ """Fallback keyword-based search."""
287
+ query_words = set(query.lower().split())
288
+ scored = []
289
+ for i, chunk in enumerate(self.kb_chunks):
290
+ chunk_words = set(chunk.lower().split())
291
+ overlap = len(query_words & chunk_words)
292
+ if overlap > 0:
293
+ scored.append({
294
+ "text": chunk,
295
+ "score": overlap / max(len(query_words), 1),
296
+ "metadata": self.kb_metadata[i]
297
+ })
298
+ scored.sort(key=lambda x: x["score"], reverse=True)
299
+ return scored[:top_k]
300
+
301
+ def embed_chunks(self, chunks):
302
+ """Embed a list of text chunks. Returns numpy array or None."""
303
+ if not self.embedder or not chunks:
304
+ return None
305
+ try:
306
+ return self.embedder.encode(
307
+ chunks,
308
+ convert_to_numpy=True,
309
+ normalize_embeddings=True,
310
+ show_progress_bar=False,
311
+ batch_size=32,
312
+ )
313
+ except Exception:
314
+ return None
315
+
316
+
317
+ class DocumentParser:
318
+ """Parse uploaded documents into text chunks."""
319
+
320
+ @staticmethod
321
+ def parse_file(filepath):
322
+ """Extract text from uploaded file."""
323
+ if filepath is None:
324
+ return "", []
325
+
326
+ filepath = str(filepath)
327
+ ext = Path(filepath).suffix.lower()
328
+
329
+ try:
330
+ if ext == ".pdf" and HAS_FITZ:
331
+ return DocumentParser._parse_pdf(filepath)
332
+ elif ext in (".txt", ".md", ".csv", ".tsv", ".fasta", ".fa", ".fastq", ".fq", ".vcf", ".bed", ".gff", ".gtf", ".sam"):
333
+ return DocumentParser._parse_text(filepath)
334
+ else:
335
+ return f"Unsupported file type: {ext}", []
336
+ except Exception as e:
337
+ return f"Error parsing file: {str(e)}", []
338
+
339
+ @staticmethod
340
+ def _parse_pdf(filepath):
341
+ doc = fitz.open(filepath)
342
+ pages = []
343
+ for page_num in range(len(doc)):
344
+ page = doc[page_num]
345
+ text = page.get_text()
346
+ if text.strip():
347
+ pages.append(text)
348
+ doc.close()
349
+ full_text = "\n\n".join(pages)
350
+ chunks = DocumentParser._chunk_text(full_text)
351
+ return full_text, chunks
352
+
353
+ @staticmethod
354
+ def _parse_text(filepath):
355
+ with open(filepath, "r", encoding="utf-8", errors="replace") as f:
356
+ text = f.read()
357
+ chunks = DocumentParser._chunk_text(text)
358
+ return text, chunks
359
+
360
+ @staticmethod
361
+ def _chunk_text(text, chunk_size=CHUNK_SIZE, overlap=CHUNK_OVERLAP):
362
+ words = text.split()
363
+ if len(words) <= chunk_size:
364
+ return [text] if text.strip() else []
365
+ chunks = []
366
+ for i in range(0, len(words), chunk_size - overlap):
367
+ chunk = " ".join(words[i:i + chunk_size])
368
+ if chunk.strip():
369
+ chunks.append(chunk)
370
+ return chunks
371
+
372
+
373
+ # ============================================================================
374
+ # INITIALIZE SERVICES
375
+ # ============================================================================
376
+
377
+ print("🧬 Initializing BB Tutor services...")
378
+ llm_service = LLMService()
379
+ rag_service = RAGService()
380
+ doc_parser = DocumentParser()
381
+ print(f" LLM available: {llm_service.is_available()}")
382
+ print(f" RAG embedder available: {rag_service.embedder is not None}")
383
+ print(f" Knowledge base chunks: {len(rag_service.kb_chunks)}")
384
+ print("βœ… BB Tutor services initialized!")
385
+
386
+
387
+ # ============================================================================
388
+ # MODULE 1: ASK THE TUTOR
389
+ # ============================================================================
390
+
391
+ def tutor_respond(message, history, system_prompt, temperature, max_tokens, rag_store):
392
+ """Main tutor chat handler with RAG-augmented responses."""
393
+ if not message.strip():
394
+ yield ""
395
+ return
396
+
397
+ # Retrieve relevant context
398
+ user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
399
+ user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
400
+
401
+ rag_results = rag_service.search(
402
+ message,
403
+ top_k=TOP_K_RETRIEVAL,
404
+ user_chunks=user_chunks,
405
+ user_embeddings=user_embeddings
406
+ )
407
+
408
+ # Build context from retrieved chunks
409
+ context_parts = []
410
+ if rag_results:
411
+ context_parts.append("RELEVANT KNOWLEDGE BASE CONTEXT:")
412
+ for r in rag_results:
413
+ source = r["metadata"].get("source", "unknown")
414
+ context_parts.append(f"[Source: {source}] {r['text']}")
415
+
416
+ # Build messages
417
+ messages = [{"role": "system", "content": system_prompt}]
418
+ if context_parts:
419
+ messages.append({
420
+ "role": "system",
421
+ "content": "\n".join(context_parts)
422
+ })
423
+
424
+ # Add conversation history
425
+ for h in history:
426
+ messages.append(h)
427
+
428
+ messages.append({"role": "user", "content": message})
429
+
430
+ # Stream response
431
+ for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=max_tokens):
432
+ yield partial
433
+
434
+
435
+ # ============================================================================
436
+ # MODULE 2: UPLOAD AND EXPLAIN
437
+ # ============================================================================
438
+
439
+ def process_upload(file, rag_store):
440
+ """Process an uploaded file: extract text, chunk, embed, explain."""
441
+ if file is None:
442
+ return "Please upload a file first.", "", rag_store
443
+
444
+ full_text, chunks = doc_parser.parse_file(file)
445
+
446
+ if not chunks:
447
+ return "Could not extract text from the uploaded file.", full_text[:2000] if full_text else "", rag_store
448
+
449
+ # Embed the chunks
450
+ embeddings = rag_service.embed_chunks(chunks)
451
+
452
+ # Update RAG store with uploaded content
453
+ new_store = dict(rag_store) if isinstance(rag_store, dict) else {"chunks": [], "embeddings": None}
454
+ new_store["chunks"] = chunks
455
+ if embeddings is not None:
456
+ new_store["embeddings"] = embeddings
457
+
458
+ # Generate explanation
459
+ preview = full_text[:3000] if len(full_text) > 3000 else full_text
460
+ messages = [
461
+ {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
462
+ {"role": "user", "content": f"Please analyze and explain this uploaded content:\n\n{preview}"}
463
+ ]
464
+ explanation = llm_service.generate(messages, temperature=0.5, max_tokens=1500)
465
+
466
+ # Add stats
467
+ stats = f"πŸ“Š **Document Stats:** {len(chunks)} chunks, ~{len(full_text.split())} words extracted\n\n---\n\n"
468
+
469
+ return stats + explanation, full_text[:5000], new_store
470
+
471
+
472
+ def upload_chat_respond(message, history, rag_store):
473
+ """Chat about uploaded documents with RAG context."""
474
+ if not message.strip():
475
+ yield ""
476
+ return
477
+
478
+ user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
479
+ user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
480
+
481
+ if not user_chunks:
482
+ yield "Please upload a document first using the upload panel above, then ask questions about it."
483
+ return
484
+
485
+ # Retrieve relevant chunks from uploaded doc
486
+ rag_results = rag_service.search(
487
+ message, top_k=4,
488
+ user_chunks=user_chunks,
489
+ user_embeddings=user_embeddings
490
+ )
491
+
492
+ context = "CONTEXT FROM UPLOADED DOCUMENT:\n"
493
+ for r in rag_results:
494
+ context += f"\n{r['text']}\n"
495
+
496
+ messages = [
497
+ {"role": "system", "content": SYSTEM_PROMPTS["upload_explain"]},
498
+ {"role": "system", "content": context},
499
+ ]
500
+ for h in history:
501
+ messages.append(h)
502
+ messages.append({"role": "user", "content": message})
503
+
504
+ for partial in llm_service.stream_chat(messages, temperature=0.5, max_tokens=1024):
505
+ yield partial
506
+
507
+
508
+ # ============================================================================
509
+ # MODULE 3: QUIZ ME
510
+ # ============================================================================
511
+
512
+ def generate_quiz(topic, quiz_type, num_questions, difficulty, rag_store):
513
+ """Generate a quiz on a bioinformatics topic."""
514
+ if not topic:
515
+ return "Please select or enter a topic first.", ""
516
+
517
+ # Get relevant context
518
+ rag_results = rag_service.search(topic, top_k=3)
519
+ context = ""
520
+ if rag_results:
521
+ context = "Use this reference material:\n" + "\n".join(r["text"] for r in rag_results)
522
+
523
+ template_key = {
524
+ "Multiple Choice (MCQ)": "mcq",
525
+ "True/False": "true_false",
526
+ "Short Answer": "short_answer"
527
+ }.get(quiz_type, "mcq")
528
+
529
+ quiz_prompt = QUIZ_TEMPLATES[template_key].format(
530
+ n=int(num_questions),
531
+ topic=topic,
532
+ difficulty=difficulty
533
+ )
534
+
535
+ messages = [
536
+ {"role": "system", "content": SYSTEM_PROMPTS["quiz_me"]},
537
+ ]
538
+ if context:
539
+ messages.append({"role": "system", "content": context})
540
+ messages.append({"role": "user", "content": quiz_prompt})
541
+
542
+ response = llm_service.generate(messages, temperature=0.8, max_tokens=2000)
543
+
544
+ # Format nicely
545
+ formatted = f"## 🧠 {topic} Quiz β€” {difficulty}\n\n"
546
+ formatted += f"*Type: {quiz_type} | Questions: {int(num_questions)}*\n\n---\n\n"
547
+ formatted += response
548
+
549
+ # Store answer key
550
+ answer_key = response
551
+
552
+ return formatted, answer_key
553
+
554
+
555
+ def check_quiz_answers(user_answers, answer_key):
556
+ """Provide feedback on quiz answers."""
557
+ if not user_answers.strip():
558
+ return "Please enter your answers first."
559
+ if not answer_key:
560
+ return "Please generate a quiz first."
561
+
562
+ messages = [
563
+ {"role": "system", "content": "You are a bioinformatics tutor grading a quiz. Compare the student's answers to the correct answers. For each answer: mark it βœ… correct or ❌ incorrect, explain why, and provide the correct answer if wrong. Be encouraging but accurate. Give a final score."},
564
+ {"role": "user", "content": f"QUIZ AND ANSWER KEY:\n{answer_key}\n\nSTUDENT'S ANSWERS:\n{user_answers}\n\nPlease grade each answer:"}
565
+ ]
566
+
567
+ return llm_service.generate(messages, temperature=0.3, max_tokens=1500)
568
+
569
+
570
+ # ============================================================================
571
+ # MODULE 4: BUILD A LESSON
572
+ # ============================================================================
573
+
574
+ def generate_lesson(topic, level, include_exercises, include_quiz):
575
+ """Generate a structured lesson on a bioinformatics topic."""
576
+ if not topic:
577
+ return "Please select or enter a topic."
578
+
579
+ # Get relevant context
580
+ rag_results = rag_service.search(topic, top_k=4)
581
+ context = ""
582
+ if rag_results:
583
+ context = "Reference material:\n" + "\n".join(r["text"] for r in rag_results)
584
+
585
+ prompt = LESSON_TEMPLATE.format(topic=topic, level=level)
586
+
587
+ if include_exercises:
588
+ prompt += "\n\nInclude 2-3 practical exercises with clear instructions."
589
+ if include_quiz:
590
+ prompt += "\n\nInclude a 5-question self-assessment quiz at the end (with answers)."
591
+
592
+ messages = [
593
+ {"role": "system", "content": SYSTEM_PROMPTS["build_lesson"]},
594
+ ]
595
+ if context:
596
+ messages.append({"role": "system", "content": context})
597
+ messages.append({"role": "user", "content": prompt})
598
+
599
+ return llm_service.generate(messages, temperature=0.7, max_tokens=3000)
600
+
601
+
602
+ # ============================================================================
603
+ # MODULE 5: WORKFLOW COACH
604
+ # ============================================================================
605
+
606
+ def workflow_respond(message, history, selected_workflow, temperature):
607
+ """Workflow coaching chat handler."""
608
+ if not message.strip():
609
+ yield ""
610
+ return
611
+
612
+ # Get workflow context
613
+ workflow_context = ""
614
+ for wf_key, wf in WORKFLOWS.items():
615
+ if wf["name"] in selected_workflow or selected_workflow.lower() in wf["name"].lower():
616
+ workflow_context = f"WORKFLOW REFERENCE: {wf['name']}\n\n"
617
+ for step in wf["steps"]:
618
+ workflow_context += f"Step {step['step']}: {step['name']}\n"
619
+ workflow_context += f" Description: {step['description']}\n"
620
+ workflow_context += f" Tools: {', '.join(step.get('tools', []))}\n"
621
+ if step.get("common_mistakes"):
622
+ workflow_context += f" Common mistakes: {'; '.join(step['common_mistakes'])}\n"
623
+ workflow_context += "\n"
624
+ break
625
+
626
+ # Also search RAG
627
+ rag_results = rag_service.search(message, top_k=2)
628
+ if rag_results:
629
+ workflow_context += "\nADDITIONAL CONTEXT:\n" + "\n".join(r["text"] for r in rag_results)
630
+
631
+ messages = [
632
+ {"role": "system", "content": SYSTEM_PROMPTS["workflow_coach"]},
633
+ ]
634
+ if workflow_context:
635
+ messages.append({"role": "system", "content": workflow_context})
636
+
637
+ for h in history:
638
+ messages.append(h)
639
+ messages.append({"role": "user", "content": message})
640
+
641
+ for partial in llm_service.stream_chat(messages, temperature=temperature, max_tokens=1500):
642
+ yield partial
643
+
644
+
645
+ # ============================================================================
646
+ # MODULE 6: PAPER TO LESSON
647
+ # ============================================================================
648
+
649
+ def paper_to_lesson_respond(message, history, output_format, rag_store):
650
+ """Convert paper content into teaching material."""
651
+ if not message.strip():
652
+ yield ""
653
+ return
654
+
655
+ user_chunks = rag_store.get("chunks", []) if isinstance(rag_store, dict) else []
656
+ user_embeddings = rag_store.get("embeddings") if isinstance(rag_store, dict) else None
657
+
658
+ context = ""
659
+ if user_chunks:
660
+ rag_results = rag_service.search(
661
+ message, top_k=4,
662
+ user_chunks=user_chunks,
663
+ user_embeddings=user_embeddings
664
+ )
665
+ if rag_results:
666
+ context = "PAPER CONTENT:\n" + "\n".join(r["text"] for r in rag_results)
667
+
668
+ format_instruction = {
669
+ "Lesson Plan": "Create a structured lesson plan with learning objectives, sections, and exercises.",
670
+ "Slide Outline": "Create a slide-by-slide outline with key points for each slide (title + 3-5 bullet points per slide).",
671
+ "Study Notes": "Create concise study notes highlighting key methods, tools, and findings.",
672
+ "Quiz Questions": "Generate 5-10 quiz questions based on the paper's methods and findings.",
673
+ }.get(output_format, "Create a structured lesson plan.")
674
+
675
+ messages = [
676
+ {"role": "system", "content": SYSTEM_PROMPTS["paper_to_lesson"]},
677
+ ]
678
+ if context:
679
+ messages.append({"role": "system", "content": context})
680
+
681
+ for h in history:
682
+ messages.append(h)
683
+
684
+ full_message = f"{message}\n\nOUTPUT FORMAT: {format_instruction}"
685
+ messages.append({"role": "user", "content": full_message})
686
+
687
+ for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=2500):
688
+ yield partial
689
+
690
+
691
+ # ============================================================================
692
+ # MODULE 7: VIVA PRACTICE
693
+ # ============================================================================
694
+
695
+ def viva_respond(message, history, topic, difficulty):
696
+ """Viva voce practice session handler."""
697
+ if not message.strip():
698
+ yield ""
699
+ return
700
+
701
+ # Get topic context
702
+ rag_results = rag_service.search(f"{topic} {message}", top_k=3)
703
+ context = ""
704
+ if rag_results:
705
+ context = "REFERENCE MATERIAL:\n" + "\n".join(r["text"] for r in rag_results)
706
+
707
+ messages = [
708
+ {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
709
+ {"role": "system", "content": f"VIVA TOPIC: {topic}\nDIFFICULTY LEVEL: {difficulty}\n\n{context}"},
710
+ ]
711
+
712
+ for h in history:
713
+ messages.append(h)
714
+ messages.append({"role": "user", "content": message})
715
+
716
+ for partial in llm_service.stream_chat(messages, temperature=0.7, max_tokens=1000):
717
+ yield partial
718
+
719
+
720
+ def start_viva(topic, difficulty):
721
+ """Generate the opening viva question."""
722
+ if not topic:
723
+ return "Please select a topic to begin the viva."
724
+
725
+ rag_results = rag_service.search(topic, top_k=2)
726
+ context = ""
727
+ if rag_results:
728
+ context = "\n".join(r["text"] for r in rag_results)
729
+
730
+ messages = [
731
+ {"role": "system", "content": SYSTEM_PROMPTS["viva_practice"]},
732
+ {"role": "system", "content": f"Topic: {topic}\nDifficulty: {difficulty}\n\nReference: {context}"},
733
+ {"role": "user", "content": f"I'm ready for my viva on {topic}. Please start with your first question."}
734
+ ]
735
+
736
+ return llm_service.generate(messages, temperature=0.7, max_tokens=500)
737
+
738
+
739
+ # ============================================================================
740
+ # GRADIO APP ASSEMBLY
741
+ # ============================================================================
742
+
743
+ # Custom CSS
744
+ CUSTOM_CSS = """
745
+ .main-header {
746
+ text-align: center;
747
+ padding: 20px;
748
+ background: linear-gradient(135deg, #1a5276 0%, #2e86c1 50%, #48c9b0 100%);
749
+ border-radius: 12px;
750
+ margin-bottom: 20px;
751
+ color: white;
752
+ }
753
+ .main-header h1 { color: white; font-size: 2em; margin-bottom: 5px; }
754
+ .main-header p { color: #ecf0f1; font-size: 1.1em; }
755
+ .module-info {
756
+ background: #f0f9ff;
757
+ border-left: 4px solid #2e86c1;
758
+ padding: 12px 16px;
759
+ margin-bottom: 16px;
760
+ border-radius: 0 8px 8px 0;
761
+ }
762
+ .safety-notice {
763
+ background: #fff3e0;
764
+ border-left: 4px solid #f39c12;
765
+ padding: 10px 14px;
766
+ margin-top: 10px;
767
+ border-radius: 0 8px 8px 0;
768
+ font-size: 0.9em;
769
+ }
770
+ """
771
+
772
+ def build_app():
773
+ with gr.Blocks(title="Bioinformatics with BB Tutor") as demo:
774
+
775
+ # Shared state across all tabs
776
+ rag_store = gr.State({"chunks": [], "embeddings": None})
777
+
778
+ # ── Header ────────────────────────────────────────────────────────
779
+ gr.HTML("""
780
+ <div class="main-header">
781
+ <h1>🧬 Bioinformatics with BB Tutor</h1>
782
+ <p>Your AI-powered bioinformatics teaching assistant</p>
783
+ <p style="font-size: 0.85em; opacity: 0.9;">
784
+ RNA-seq Β· Exome Β· Genome Β· Microbiome Β· Variants Β· Molecular Genetics Β· scRNA-seq Β· ATAC-seq Β· ChIP-seq Β· and more
785
+ </p>
786
+ </div>
787
+ """)
788
+
789
+ with gr.Tabs():
790
+
791
+ # ══════════════════════════════════════════════════════════════
792
+ # TAB 1: ASK THE TUTOR
793
+ # ══════════════════════════════════════════════════════════════
794
+ with gr.Tab("🧬 Ask the Tutor", id="ask"):
795
+ gr.HTML('<div class="module-info">πŸ’‘ Ask any bioinformatics question. The tutor uses a curated knowledge base to provide accurate, educational answers with proper context.</div>')
796
+
797
+ gr.ChatInterface(
798
+ fn=tutor_respond,
799
+ type="messages",
800
+ additional_inputs=[
801
+ gr.Textbox(
802
+ value=SYSTEM_PROMPTS["ask_tutor"],
803
+ label="System Prompt",
804
+ lines=3,
805
+ visible=True,
806
+ ),
807
+ gr.Slider(
808
+ minimum=0.1, maximum=1.5, value=0.7, step=0.1,
809
+ label="Temperature (lower = more focused, higher = more creative)"
810
+ ),
811
+ gr.Slider(
812
+ minimum=256, maximum=4096, value=1024, step=256,
813
+ label="Max Response Length (tokens)"
814
+ ),
815
+ rag_store,
816
+ ],
817
+ additional_inputs_accordion=gr.Accordion("βš™οΈ Advanced Settings", open=False),
818
+ examples=[
819
+ "What is the difference between DESeq2 and edgeR for differential expression analysis?",
820
+ "Explain the GATK Best Practices variant calling pipeline step by step.",
821
+ "What is the difference between alpha and beta diversity in microbiome analysis?",
822
+ "Why should I use adjusted p-values instead of raw p-values?",
823
+ "Explain the single-cell RNA-seq analysis workflow from raw data to cell type annotation.",
824
+ "What is BQSR and why is it important in variant calling?",
825
+ ],
826
+ save_history=True,
827
+ )
828
+
829
+ gr.HTML('<div class="safety-notice">⚠️ <strong>Educational use only.</strong> This tutor provides learning support, not clinical interpretations. Always consult qualified professionals for clinical genomics decisions.</div>')
830
+
831
+ # ══════════════════════════════════════════════════════════════
832
+ # TAB 2: UPLOAD AND EXPLAIN
833
+ # ══════════════════════════════════════════════════════════════
834
+ with gr.Tab("πŸ“„ Upload & Explain", id="upload"):
835
+ gr.HTML('<div class="module-info">πŸ“„ Upload bioinformatics documents (PDFs, text files, VCFs, FASTA, etc.) and get AI-powered explanations. Uploaded content becomes available for Q&A across all modules.</div>')
836
+
837
+ with gr.Row():
838
+ with gr.Column(scale=1):
839
+ file_input = gr.File(
840
+ label="Upload Document",
841
+ file_types=[".pdf", ".txt", ".md", ".csv", ".tsv",
842
+ ".fasta", ".fa", ".fastq", ".vcf", ".bed",
843
+ ".gff", ".gtf", ".sam"],
844
+ file_count="single",
845
+ type="filepath",
846
+ )
847
+ process_btn = gr.Button("πŸ” Analyze Document", variant="primary", size="lg")
848
+
849
+ with gr.Column(scale=2):
850
+ explanation_output = gr.Markdown(label="Analysis & Explanation")
851
+
852
+ with gr.Accordion("πŸ“ Raw Extracted Text", open=False):
853
+ raw_text_output = gr.Textbox(label="Extracted Text", lines=10, show_copy_button=True)
854
+
855
+ process_btn.click(
856
+ fn=process_upload,
857
+ inputs=[file_input, rag_store],
858
+ outputs=[explanation_output, raw_text_output, rag_store],
859
+ )
860
+
861
+ gr.Markdown("### πŸ’¬ Ask Questions About Your Document")
862
+ gr.ChatInterface(
863
+ fn=upload_chat_respond,
864
+ type="messages",
865
+ additional_inputs=[rag_store],
866
+ additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
867
+ examples=[
868
+ "Summarize the key methods used in this paper.",
869
+ "What bioinformatics tools are mentioned?",
870
+ "Explain the main findings in simple terms.",
871
+ "What are the limitations of this analysis?",
872
+ ],
873
+ )
874
+
875
+ # ══════════════════════════════════════════════════════════════
876
+ # TAB 3: QUIZ ME
877
+ # ══════════════════════════════════════════════════════════════
878
+ with gr.Tab("❓ Quiz Me", id="quiz"):
879
+ gr.HTML('<div class="module-info">🧠 Test your knowledge with auto-generated quizzes. Choose a topic, format, and difficulty level.</div>')
880
+
881
+ with gr.Row():
882
+ with gr.Column(scale=2):
883
+ quiz_topic = gr.Dropdown(
884
+ choices=TOPIC_CHOICES,
885
+ label="Select Topic",
886
+ allow_custom_value=True,
887
+ value="RNA-seq: Differential Expression (DESeq2)"
888
+ )
889
+ with gr.Column(scale=1):
890
+ quiz_type = gr.Radio(
891
+ choices=["Multiple Choice (MCQ)", "True/False", "Short Answer"],
892
+ value="Multiple Choice (MCQ)",
893
+ label="Question Format"
894
+ )
895
+
896
+ with gr.Row():
897
+ with gr.Column(scale=1):
898
+ quiz_difficulty = gr.Radio(
899
+ choices=DIFFICULTY_LEVELS,
900
+ value="Intermediate",
901
+ label="Difficulty"
902
+ )
903
+ with gr.Column(scale=1):
904
+ num_questions = gr.Slider(
905
+ minimum=1, maximum=10, value=5, step=1,
906
+ label="Number of Questions"
907
+ )
908
+ with gr.Column(scale=1):
909
+ generate_quiz_btn = gr.Button("🎲 Generate Quiz", variant="primary", size="lg")
910
+
911
+ quiz_output = gr.Markdown(label="Generated Quiz")
912
+ answer_key_state = gr.State("")
913
+
914
+ generate_quiz_btn.click(
915
+ fn=generate_quiz,
916
+ inputs=[quiz_topic, quiz_type, num_questions, quiz_difficulty, rag_store],
917
+ outputs=[quiz_output, answer_key_state],
918
+ )
919
+
920
+ gr.Markdown("---")
921
+ gr.Markdown("### ✍️ Submit Your Answers")
922
+ user_answers = gr.Textbox(
923
+ label="Enter your answers (e.g., '1: A, 2: B, 3: True...')",
924
+ lines=5,
925
+ placeholder="Type your answers here..."
926
+ )
927
+ check_btn = gr.Button("βœ… Check Answers", variant="primary")
928
+ feedback_output = gr.Markdown(label="Feedback")
929
+
930
+ check_btn.click(
931
+ fn=check_quiz_answers,
932
+ inputs=[user_answers, answer_key_state],
933
+ outputs=[feedback_output],
934
+ )
935
+
936
+ # ══════════════════════════════════════════════════════════════
937
+ # TAB 4: BUILD A LESSON
938
+ # ══════════════════════════════════════════════════════════════
939
+ with gr.Tab("πŸ“š Build a Lesson", id="lesson"):
940
+ gr.HTML('<div class="module-info">πŸ“š Generate structured lessons with learning objectives, explanations, exercises, and quizzes for any bioinformatics topic.</div>')
941
+
942
+ with gr.Row():
943
+ with gr.Column(scale=2):
944
+ lesson_topic = gr.Dropdown(
945
+ choices=TOPIC_CHOICES,
946
+ label="Lesson Topic",
947
+ allow_custom_value=True,
948
+ value="RNA-seq: Differential Expression (DESeq2)"
949
+ )
950
+ with gr.Column(scale=1):
951
+ lesson_level = gr.Radio(
952
+ choices=DIFFICULTY_LEVELS,
953
+ value="Intermediate",
954
+ label="Student Level"
955
+ )
956
+
957
+ with gr.Row():
958
+ include_exercises = gr.Checkbox(label="Include Practical Exercises", value=True)
959
+ include_quiz = gr.Checkbox(label="Include Self-Assessment Quiz", value=True)
960
+ generate_lesson_btn = gr.Button("πŸ“ Generate Lesson", variant="primary", size="lg")
961
+
962
+ lesson_output = gr.Markdown(label="Generated Lesson")
963
+
964
+ generate_lesson_btn.click(
965
+ fn=generate_lesson,
966
+ inputs=[lesson_topic, lesson_level, include_exercises, include_quiz],
967
+ outputs=[lesson_output],
968
+ )
969
+
970
+ # ══════════════════════════════════════════════════════════════
971
+ # TAB 5: WORKFLOW COACH
972
+ # ══════════════════════════════════════════════════════════════
973
+ with gr.Tab("πŸ”¬ Workflow Coach", id="workflow"):
974
+ gr.HTML('<div class="module-info">πŸ”¬ Get step-by-step guidance through bioinformatics analysis pipelines. Select a workflow and ask questions about any step.</div>')
975
+
976
+ workflow_selector = gr.Dropdown(
977
+ choices=WORKFLOW_CHOICES,
978
+ label="Select Workflow",
979
+ value="Bulk RNA-seq: Full DE Analysis Pipeline",
980
+ allow_custom_value=True,
981
+ )
982
+
983
+ gr.ChatInterface(
984
+ fn=workflow_respond,
985
+ type="messages",
986
+ additional_inputs=[
987
+ workflow_selector,
988
+ gr.Slider(
989
+ minimum=0.1, maximum=1.5, value=0.7, step=0.1,
990
+ label="Temperature"
991
+ ),
992
+ ],
993
+ additional_inputs_accordion=gr.Accordion("βš™οΈ Settings", open=False),
994
+ examples=[
995
+ "Walk me through the complete pipeline from raw FASTQ to differential expression results.",
996
+ "I'm at the alignment step. What should I check before moving to counting?",
997
+ "My mapping rate is only 45%. What could be wrong?",
998
+ "How do I choose between STAR and HISAT2 for RNA-seq alignment?",
999
+ "What parameters should I use for GATK HaplotypeCaller on exome data?",
1000
+ "How do I set the truncation parameters for DADA2 in QIIME2?",
1001
+ ],
1002
+ )
1003
+
1004
+ # ══════════════════════════════════════════════════════════════
1005
+ # TAB 6: PAPER TO LESSON
1006
+ # ══════════════════════════════════════════════════════════════
1007
+ with gr.Tab("πŸ“° Paper to Lesson", id="paper"):
1008
+ gr.HTML('<div class="module-info">πŸ“° Convert research papers into teaching material. Upload a paper first in the "Upload & Explain" tab, then use this module to generate lessons, slide outlines, and quiz questions from it.</div>')
1009
+
1010
+ output_format = gr.Radio(
1011
+ choices=["Lesson Plan", "Slide Outline", "Study Notes", "Quiz Questions"],
1012
+ value="Lesson Plan",
1013
+ label="Output Format"
1014
+ )
1015
+
1016
+ gr.ChatInterface(
1017
+ fn=paper_to_lesson_respond,
1018
+ type="messages",
1019
+ additional_inputs=[
1020
+ output_format,
1021
+ rag_store,
1022
+ ],
1023
+ additional_inputs_accordion=gr.Accordion("", open=False, visible=False),
1024
+ examples=[
1025
+ "Convert this paper into a 45-minute lecture plan.",
1026
+ "Create a slide outline covering the key methods in this paper.",
1027
+ "Generate study notes highlighting the bioinformatics methods used.",
1028
+ "Create quiz questions testing understanding of this paper's methodology.",
1029
+ ],
1030
+ )
1031
+
1032
+ # ══════════════════════════════════════════════════════════════
1033
+ # TAB 7: VIVA PRACTICE
1034
+ # ══════════════════════════════════════════════════════════════
1035
+ with gr.Tab("πŸŽ“ Viva Practice", id="viva"):
1036
+ gr.HTML('<div class="module-info">πŸŽ“ Practice for oral examinations. The AI examiner asks probing questions, evaluates your answers, and pushes you to demonstrate deeper understanding.</div>')
1037
+
1038
+ with gr.Row():
1039
+ viva_topic = gr.Dropdown(
1040
+ choices=TOPIC_CHOICES,
1041
+ label="Viva Topic",
1042
+ allow_custom_value=True,
1043
+ value="RNA-seq: Differential Expression (DESeq2)"
1044
+ )
1045
+ viva_difficulty = gr.Radio(
1046
+ choices=DIFFICULTY_LEVELS,
1047
+ value="Intermediate",
1048
+ label="Exam Difficulty"
1049
+ )
1050
+
1051
+ gr.ChatInterface(
1052
+ fn=viva_respond,
1053
+ type="messages",
1054
+ additional_inputs=[
1055
+ viva_topic,
1056
+ viva_difficulty,
1057
+ ],
1058
+ additional_inputs_accordion=gr.Accordion("βš™οΈ Settings", open=False),
1059
+ examples=[
1060
+ "I'm ready for my viva. Please start with your first question.",
1061
+ "Can we focus on the statistical aspects of RNA-seq analysis?",
1062
+ "Ask me about variant calling and interpretation.",
1063
+ "Test my understanding of microbiome diversity analysis.",
1064
+ ],
1065
+ )
1066
+
1067
+ # ── Footer ────────────────────────────────────────────────────────
1068
+ gr.HTML("""
1069
+ <div style="text-align: center; padding: 20px; margin-top: 20px; border-top: 1px solid #e0e0e0; color: #666; font-size: 0.85em;">
1070
+ <p><strong>Bioinformatics with BB Tutor</strong> β€” Educational AI Assistant</p>
1071
+ <p>⚠️ For educational purposes only. Not for clinical use. Always verify critical information with primary sources.</p>
1072
+ <p>Domains: RNA-seq Β· Exome Β· Genome Β· Microbiome Β· Variants Β· Molecular Genetics Β· scRNA-seq Β· ATAC-seq Β· ChIP-seq Β· Methylation Β· Small RNA Β· Targeted Panels Β· Long-read Β· Spatial Transcriptomics Β· Multi-omics</p>
1073
+ </div>
1074
+ """)
1075
+
1076
+ return demo
1077
+
1078
+
1079
+ # ============================================================================
1080
+ # LAUNCH
1081
+ # ============================================================================
1082
+
1083
+ if __name__ == "__main__":
1084
+ demo = build_app()
1085
+ demo.launch(
1086
+ server_name="0.0.0.0",
1087
+ server_port=7860,
1088
+ share=False,
1089
+ )