llexieguo commited on
Commit
733c2e2
·
1 Parent(s): 1b60df3

Initial PDF tutor app

Browse files
(0) 70113_Generative_AI_README_for_Coursework.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
.gitignore CHANGED
@@ -205,3 +205,5 @@ cython_debug/
205
  marimo/_static/
206
  marimo/_lsp/
207
  __marimo__/
 
 
 
205
  marimo/_static/
206
  marimo/_lsp/
207
  __marimo__/
208
+ tmp_outputs/
209
+ .DS_Store
app.py ADDED
@@ -0,0 +1,1625 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import json
3
+ import math
4
+ import os
5
+ import re
6
+ import uuid
7
+ import wave
8
+ from dataclasses import dataclass, asdict
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ import gradio as gr
13
+ import requests
14
+
15
+ try:
16
+ import spaces # type: ignore
17
+ except Exception:
18
+ class _SpacesFallback:
19
+ @staticmethod
20
+ def GPU(fn):
21
+ return fn
22
+
23
+ spaces = _SpacesFallback() # type: ignore
24
+
25
+ try:
26
+ from pypdf import PdfReader
27
+ except Exception: # pragma: no cover
28
+ PdfReader = None # type: ignore
29
+
30
+ try:
31
+ import pypdfium2 as pdfium
32
+ except Exception: # pragma: no cover
33
+ pdfium = None # type: ignore
34
+
35
+
36
+ APP_DIR = Path(__file__).parent.resolve()
37
+ TMP_DIR = APP_DIR / "tmp_outputs"
38
+ TMP_DIR.mkdir(exist_ok=True)
39
+
40
+ def _load_dotenv_file(dotenv_path: Path) -> None:
41
+ if not dotenv_path.exists():
42
+ return
43
+ for raw_line in dotenv_path.read_text(encoding="utf-8").splitlines():
44
+ line = raw_line.strip()
45
+ if not line or line.startswith("#") or "=" not in line:
46
+ continue
47
+ key, value = line.split("=", 1)
48
+ key = key.strip()
49
+ value = value.strip().strip('"').strip("'")
50
+ if key and key not in os.environ:
51
+ os.environ[key] = value
52
+
53
+
54
+ _load_dotenv_file(APP_DIR / ".env")
55
+
56
+ API_URL = os.getenv("API_URL") or os.getenv("API_UR", "")
57
+ API_KEY = os.getenv("API_KEY", "")
58
+ USE_MOCK_MODELS = os.getenv("USE_MOCK_MODELS", "0" if (API_URL and API_KEY) else "1") == "1"
59
+ CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
60
+ TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
61
+ TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
62
+ TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
63
+ API_TIMEOUT_SEC = int(os.getenv("API_TIMEOUT_SEC", "180"))
64
+ QWEN_VL_MAX_PAGES = int(os.getenv("QWEN_VL_MAX_PAGES", "4"))
65
+ QWEN_VL_RENDER_SCALE = float(os.getenv("QWEN_VL_RENDER_SCALE", "1.5"))
66
+ QWEN_VL_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MAX_NEW_TOKENS", "800"))
67
+ QWEN_VL_MCQ_MAX_NEW_TOKENS = int(os.getenv("QWEN_VL_MCQ_MAX_NEW_TOKENS", "1800"))
68
+
69
+
70
+ DEFAULT_LECTURE_PROMPT_TEMPLATE = """
71
+ 你是一名课程助教。请阅读用户上传的论文内容,并输出一段中文讲解,要求:
72
+ 1. 先说明论文要解决的问题和背景;
73
+ 2. 再解释核心方法(按步骤/模块);
74
+ 3. 再总结实验结果或亮点;
75
+ 4. 最后给出局限性与适用场景;
76
+ 5. 语言清晰,适合课堂讲解(约 400-700 字)。
77
+
78
+ 论文内容(可能是节选):
79
+ {document}
80
+ """.strip()
81
+
82
+
83
+ DEFAULT_MCQ_PROMPT_TEMPLATE = """
84
+ 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
85
+ 严格输出 JSON(不要 markdown 代码块),格式如下:
86
+ {{
87
+ "questions": [
88
+ {{
89
+ "question": "...",
90
+ "options": ["A选项", "B选项", "C选项", "D选项"],
91
+ "answer": "A",
92
+ "explanation": "..."
93
+ }}
94
+ ]
95
+ }}
96
+
97
+ 要求:
98
+ 1. 共 5 题;
99
+ 2. 每题 4 个选项;
100
+ 3. answer 必须是 A/B/C/D;
101
+ 4. 解析要说明为什么正确,以及常见误区;
102
+ 5. 题目应覆盖问题背景、方法、实验/结果、局限性。
103
+
104
+ 论文内容(可能是节选):
105
+ {document}
106
+ """.strip()
107
+
108
+
109
+ DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE = """
110
+ 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
111
+
112
+ 限制:
113
+ 1. 必须是紧凑 JSON(单行也可以);
114
+ 2. 共 5 题;
115
+ 3. 每题 question、options(4项)、answer(A/B/C/D)、explanation;
116
+ 4. 解析简短(1-2句),避免过长;
117
+ 5. 如果不确定,仍按论文内容出题,不要输出额外文字。
118
+
119
+ 输出格式:
120
+ {{"questions":[{{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}}]}}
121
+
122
+ 论文内容:
123
+ {document}
124
+ """.strip()
125
+
126
+
127
+ CHARACTERS_DIR = APP_DIR / "characters"
128
+
129
+
130
+ def _read_text_if_exists(path: Path, fallback: str) -> str:
131
+ try:
132
+ return path.read_text(encoding="utf-8").strip()
133
+ except Exception:
134
+ return fallback
135
+
136
+
137
+ def render_prompt_template(template: str, document: str) -> str:
138
+ # Avoid `str.format(...)` because character prompt files may contain JSON braces.
139
+ return str(template).replace("{document}", document)
140
+
141
+
142
+ def load_character_configs() -> Dict[str, Dict[str, Any]]:
143
+ configs: Dict[str, Dict[str, Any]] = {}
144
+ if CHARACTERS_DIR.exists():
145
+ for d in sorted(CHARACTERS_DIR.iterdir()):
146
+ if not d.is_dir():
147
+ continue
148
+ meta_path = d / "meta.json"
149
+ meta: Dict[str, Any] = {}
150
+ if meta_path.exists():
151
+ try:
152
+ parsed = json.loads(meta_path.read_text(encoding="utf-8"))
153
+ if isinstance(parsed, dict):
154
+ meta = parsed
155
+ except Exception:
156
+ meta = {}
157
+ cid = str(meta.get("id") or d.name)
158
+ if cid in configs:
159
+ cid = d.name
160
+ avatar_rel = str(meta.get("avatar", "avatar.jpg"))
161
+ config: Dict[str, Any] = {
162
+ "id": cid,
163
+ "display_name": str(meta.get("display_name", d.name)),
164
+ "tagline": str(meta.get("tagline", "Research paper explainer · MCQ coach")),
165
+ "byline": str(meta.get("byline", "By @local-demo")),
166
+ "chat_label": str(meta.get("chat_label", meta.get("display_name", d.name))),
167
+ "chat_mode": str(meta.get("chat_mode", "paper mode")),
168
+ "avatar_path": str((d / avatar_rel).resolve()),
169
+ "lecture_prompt_template": _read_text_if_exists(
170
+ d / str(meta.get("lecture_prompt_file", "lecture_prompt.txt")),
171
+ DEFAULT_LECTURE_PROMPT_TEMPLATE,
172
+ ),
173
+ "mcq_prompt_template": _read_text_if_exists(
174
+ d / str(meta.get("mcq_prompt_file", "mcq_prompt.txt")),
175
+ DEFAULT_MCQ_PROMPT_TEMPLATE,
176
+ ),
177
+ "mcq_retry_prompt_template": _read_text_if_exists(
178
+ d / str(meta.get("mcq_retry_prompt_file", "mcq_retry_prompt.txt")),
179
+ DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
180
+ ),
181
+ }
182
+ configs[cid] = config
183
+
184
+ if not configs:
185
+ # Fallback to a built-in default character if no folder config exists.
186
+ configs["default"] = {
187
+ "id": "default",
188
+ "display_name": "PDF Paper Tutor",
189
+ "tagline": "Research paper explainer · MCQ coach",
190
+ "byline": "By @local-demo",
191
+ "chat_label": "PDF Paper Tutor",
192
+ "chat_mode": "paper mode",
193
+ "avatar_path": str((APP_DIR / "avatar.jpg").resolve()) if (APP_DIR / "avatar.jpg").exists() else "",
194
+ "lecture_prompt_template": DEFAULT_LECTURE_PROMPT_TEMPLATE,
195
+ "mcq_prompt_template": DEFAULT_MCQ_PROMPT_TEMPLATE,
196
+ "mcq_retry_prompt_template": DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE,
197
+ }
198
+ return configs
199
+
200
+
201
+ CHARACTER_CONFIGS = load_character_configs()
202
+ DEFAULT_CHARACTER_ID = next(iter(CHARACTER_CONFIGS.keys()))
203
+
204
+
205
+ def get_character_config(character_id: Optional[str]) -> Dict[str, Any]:
206
+ if character_id and character_id in CHARACTER_CONFIGS:
207
+ return CHARACTER_CONFIGS[character_id]
208
+ return CHARACTER_CONFIGS[DEFAULT_CHARACTER_ID]
209
+
210
+
211
+ @dataclass
212
+ class MCQItem:
213
+ question: str
214
+ options: List[str]
215
+ answer: str # A/B/C/D
216
+ explanation: str
217
+
218
+ def to_display_choices(self) -> List[str]:
219
+ labels = ["A", "B", "C", "D"]
220
+ return [f"{labels[i]}. {opt}" for i, opt in enumerate(self.options)]
221
+
222
+ def correct_choice_display(self) -> str:
223
+ idx = ["A", "B", "C", "D"].index(self.answer)
224
+ return self.to_display_choices()[idx]
225
+
226
+
227
+ def new_session_state() -> Dict[str, Any]:
228
+ return {
229
+ "lecture_text": "",
230
+ "lecture_audio_path": None,
231
+ "explanation_audio_path": None,
232
+ "last_explanation_tts_text": "",
233
+ "pdf_path": None,
234
+ "pdf_excerpt": "",
235
+ "character_id": DEFAULT_CHARACTER_ID,
236
+ "exam_character_id": None,
237
+ "current_page": "explain",
238
+ "mcqs": [],
239
+ "current_index": 0,
240
+ "score": 0,
241
+ "awaiting_next_after_wrong": False,
242
+ "completed": False,
243
+ "status": "Idle",
244
+ }
245
+
246
+
247
+ def strip_code_fence(text: str) -> str:
248
+ s = text.strip()
249
+ if s.startswith("```"):
250
+ s = re.sub(r"^```[a-zA-Z0-9_-]*\n?", "", s)
251
+ s = re.sub(r"\n?```$", "", s)
252
+ return s.strip()
253
+
254
+
255
+ def extract_pdf_text(pdf_path: str, max_chars: int = 16000) -> str:
256
+ if PdfReader is None:
257
+ return (
258
+ "PDF text extraction library (pypdf) is unavailable. "
259
+ "Please install pypdf or switch to a Vision-based PDF reader implementation."
260
+ )
261
+
262
+ reader = PdfReader(pdf_path)
263
+ chunks: List[str] = []
264
+ total = 0
265
+ for page_idx, page in enumerate(reader.pages, start=1):
266
+ try:
267
+ text = page.extract_text() or ""
268
+ except Exception:
269
+ text = ""
270
+ if text.strip():
271
+ chunk = f"[Page {page_idx}]\n{text.strip()}\n"
272
+ chunks.append(chunk)
273
+ total += len(chunk)
274
+ if total >= max_chars:
275
+ break
276
+
277
+ if not chunks:
278
+ return (
279
+ "No extractable text was found in the PDF. "
280
+ "For scanned PDFs, implement page-image rendering and pass images to Qwen-VL."
281
+ )
282
+ return "\n".join(chunks)[:max_chars]
283
+
284
+
285
+ def write_tone_wav(text: str, out_path: str, seconds: float = 2.0, sample_rate: int = 16000) -> str:
286
+ # Mock TTS fallback: writes a short tone so the UI flow is testable without a TTS model.
287
+ freq = 440 + (len(text) % 220)
288
+ amplitude = 9000
289
+ frames = int(sample_rate * max(1.0, min(seconds, 8.0)))
290
+ with wave.open(out_path, "wb") as wf:
291
+ wf.setnchannels(1)
292
+ wf.setsampwidth(2)
293
+ wf.setframerate(sample_rate)
294
+ for i in range(frames):
295
+ sample = int(amplitude * math.sin(2 * math.pi * freq * (i / sample_rate)))
296
+ wf.writeframesraw(sample.to_bytes(2, byteorder="little", signed=True))
297
+ return out_path
298
+
299
+
300
+ def render_pdf_pages_for_vl(pdf_path: str, max_pages: int, scale: float) -> List[str]:
301
+ if pdfium is None:
302
+ raise RuntimeError("pypdfium2 is required to render PDF pages for Qwen3-VL.")
303
+ doc = pdfium.PdfDocument(pdf_path)
304
+ page_count = len(doc)
305
+ if page_count == 0:
306
+ raise RuntimeError("Uploaded PDF has no pages.")
307
+
308
+ render_dir = TMP_DIR / f"pdf_pages_{uuid.uuid4().hex}"
309
+ render_dir.mkdir(exist_ok=True)
310
+
311
+ paths: List[str] = []
312
+ try:
313
+ for i in range(min(page_count, max_pages)):
314
+ page = doc[i]
315
+ pil = page.render(scale=scale).to_pil()
316
+ pil = pil.convert("RGB")
317
+ out_path = render_dir / f"page_{i+1:02d}.png"
318
+ pil.save(out_path, format="PNG")
319
+ paths.append(str(out_path))
320
+ close_fn = getattr(page, "close", None)
321
+ if callable(close_fn):
322
+ close_fn()
323
+ finally:
324
+ close_fn = getattr(doc, "close", None)
325
+ if callable(close_fn):
326
+ close_fn()
327
+
328
+ if not paths:
329
+ raise RuntimeError("Failed to render PDF pages for Qwen3-VL.")
330
+ return paths
331
+
332
+
333
+ def image_file_to_data_url(image_path: str) -> str:
334
+ image_bytes = Path(image_path).read_bytes()
335
+ b64 = base64.b64encode(image_bytes).decode("ascii")
336
+ return f"data:image/png;base64,{b64}"
337
+
338
+
339
+ def _api_headers() -> Dict[str, str]:
340
+ if not API_KEY:
341
+ raise RuntimeError("Missing API_KEY. Put it in .env or environment variables.")
342
+ return {
343
+ "Authorization": f"Bearer {API_KEY}",
344
+ "Content-Type": "application/json",
345
+ }
346
+
347
+
348
+ def _require_api_url() -> str:
349
+ if not API_URL:
350
+ raise RuntimeError("Missing API_URL/API_UR. Put it in .env or environment variables.")
351
+ return API_URL.rstrip("/")
352
+
353
+
354
+ def _dashscope_tts_url() -> str:
355
+ base = _require_api_url()
356
+ if "/compatible-mode/" in base:
357
+ root = base.split("/compatible-mode/", 1)[0]
358
+ elif base.endswith("/v1"):
359
+ root = base[:-3]
360
+ else:
361
+ root = base
362
+ return f"{root}/api/v1/services/aigc/multimodal-generation/generation"
363
+
364
+
365
+ def _save_binary_audio(audio_bytes: bytes, out_path: str) -> str:
366
+ Path(out_path).write_bytes(audio_bytes)
367
+ return out_path
368
+
369
+
370
+ def split_text_for_tts(text: str, max_len: int = 480) -> List[str]:
371
+ cleaned = re.sub(r"\s+", " ", (text or "")).strip()
372
+ if not cleaned:
373
+ return []
374
+ if len(cleaned) <= max_len:
375
+ return [cleaned]
376
+
377
+ # Prefer sentence-ish chunks, then hard-split as fallback.
378
+ pieces = re.split(r"(?<=[。!?!?;;::\.])\s*", cleaned)
379
+ chunks: List[str] = []
380
+ buf = ""
381
+ for piece in pieces:
382
+ piece = piece.strip()
383
+ if not piece:
384
+ continue
385
+ if len(piece) > max_len:
386
+ if buf:
387
+ chunks.append(buf)
388
+ buf = ""
389
+ for i in range(0, len(piece), max_len):
390
+ chunks.append(piece[i:i + max_len])
391
+ continue
392
+ candidate = f"{buf} {piece}".strip() if buf else piece
393
+ if len(candidate) <= max_len:
394
+ buf = candidate
395
+ else:
396
+ chunks.append(buf)
397
+ buf = piece
398
+ if buf:
399
+ chunks.append(buf)
400
+ return chunks
401
+
402
+
403
+ def concat_wav_files(wav_paths: List[str], out_path: str) -> str:
404
+ if not wav_paths:
405
+ raise RuntimeError("No WAV chunks to concatenate.")
406
+ if len(wav_paths) == 1:
407
+ return _save_binary_audio(Path(wav_paths[0]).read_bytes(), out_path)
408
+
409
+ params = None
410
+ frames: List[bytes] = []
411
+ for p in wav_paths:
412
+ with wave.open(str(p), "rb") as wf:
413
+ cur_params = (wf.getnchannels(), wf.getsampwidth(), wf.getframerate())
414
+ if params is None:
415
+ params = cur_params
416
+ elif cur_params != params:
417
+ raise RuntimeError("TTS WAV chunks have mismatched formats and cannot be concatenated.")
418
+ frames.append(wf.readframes(wf.getnframes()))
419
+
420
+ assert params is not None
421
+ with wave.open(out_path, "wb") as out:
422
+ out.setnchannels(params[0])
423
+ out.setsampwidth(params[1])
424
+ out.setframerate(params[2])
425
+ for f in frames:
426
+ out.writeframes(f)
427
+ return out_path
428
+
429
+
430
+ class QwenPipelineEngine:
431
+ """
432
+ Gradio-facing backend for:
433
+ PDF -> lecture text -> MCQs -> TTS audio
434
+
435
+ This ships with a mock mode by default so the workflow is runnable immediately.
436
+ When USE_MOCK_MODELS=0, it calls remote APIs:
437
+ - VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
438
+ - TTS: DashScope multimodal generation API (returns audio URL)
439
+ """
440
+
441
+ def __init__(self) -> None:
442
+ self.mock_mode = USE_MOCK_MODELS
443
+ self.vl_loaded = False
444
+ self.tts_loaded = False
445
+ self._pdf_page_cache: Dict[str, List[str]] = {}
446
+
447
+ def ensure_vl_loaded(self) -> None:
448
+ if self.vl_loaded:
449
+ return
450
+ if self.mock_mode:
451
+ self.vl_loaded = True
452
+ return
453
+ _require_api_url()
454
+ if not API_KEY:
455
+ raise RuntimeError("Missing API_KEY for VL API calls.")
456
+ self.vl_loaded = True
457
+
458
+ def ensure_tts_loaded(self) -> None:
459
+ if self.tts_loaded:
460
+ return
461
+ if self.mock_mode:
462
+ self.tts_loaded = True
463
+ return
464
+ _require_api_url()
465
+ if not API_KEY:
466
+ raise RuntimeError("Missing API_KEY for TTS API calls.")
467
+ self.tts_loaded = True
468
+
469
+ def _mock_generate_lecture(self, pdf_excerpt: str) -> str:
470
+ excerpt = re.sub(r"\s+", " ", pdf_excerpt).strip()
471
+ excerpt = excerpt[:1000]
472
+ return (
473
+ "这是一段基于论文内容的课堂讲解(Mock 模式)。\n\n"
474
+ "1. 论文问题与背景:该工作试图解决一个具体任务中的效率/性能/泛化问题,核心动机通常是现有方法在成本、准确性或可解释性方面存在不足。\n"
475
+ "2. 核心方法:作者提出新的模型结构、训练策略或推理流程,并通过若干模块协同完成任务。\n"
476
+ "3. 实验与结果:论文通常会在标准数据集上与基线比较,并报告性能提升、效率改善或更稳定的表现。\n"
477
+ "4. 局限与适用场景:方法可能依赖特定数据分布、计算资源或任务设定,迁移到新领域需要额外验证。\n\n"
478
+ f"论文节选(用于生成讲解): {excerpt}"
479
+ )
480
+
481
+ def _mock_generate_mcqs(self, lecture_text: str) -> List[MCQItem]:
482
+ base_questions = [
483
+ MCQItem(
484
+ question="这篇论文最主要想解决的问题通常属于下列哪一类?",
485
+ options=["现有方法存在性能或效率瓶颈", "如何设计数据库索引", "如何搭建前端页面", "如何压缩视频文件"],
486
+ answer="A",
487
+ explanation="课程论文阅读类任务通常围绕已有方法不足展开,作者提出新方法来提升性能、效率或鲁棒性。",
488
+ ),
489
+ MCQItem(
490
+ question="在讲解论文方法时,最合理的组织方式是什么?",
491
+ options=["按模块或步骤解释输入到输出流程", "只列出参考文献", "只展示实验表格不解释方法", "只讲结论不讲背景"],
492
+ answer="A",
493
+ explanation="课堂讲解需要结构化地说明方法流程,这样听众才能理解论文如何从问题走到解法。",
494
+ ),
495
+ MCQItem(
496
+ question="生成选择题时,为什么需要同时给出答案和解析?",
497
+ options=["便于交互反馈与纠错教学", "只是为了让 JSON 更长", "因为 Gradio 要求必须有解析", "为了减少题目数量"],
498
+ answer="A",
499
+ explanation="答案和解析是教学闭环的一部分,错误时给出解析能帮助用户理解常见误区。",
500
+ ),
501
+ MCQItem(
502
+ question="如果论文很长,一次性输入模型的风险是什么?",
503
+ options=["上下文超长导致成本高、信息丢失或失败", "模型会自动变得更准确", "TTS 音频会变短", "PDF 文件会损坏"],
504
+ answer="A",
505
+ explanation="长文档通常需要分块总结再汇总,避免超出上下文窗口并降低生成质量波动。",
506
+ ),
507
+ MCQItem(
508
+ question="在这个 Demo 流程中,Qwen TTS 的作用是什么?",
509
+ options=["把讲解与错题解析转成语音输出", "把 PDF 转成图片", "训练 Qwen3-VL-8B", "生成新的选择题答案"],
510
+ answer="A",
511
+ explanation="TTS 用于将文本讲解/解析语音化,提高交互演示效果和可访问性。",
512
+ ),
513
+ ]
514
+ return base_questions
515
+
516
+ def _get_pdf_page_images(self, pdf_path: str) -> List[str]:
517
+ cache_key = str(Path(pdf_path).resolve())
518
+ cached = self._pdf_page_cache.get(cache_key)
519
+ if cached and all(Path(p).exists() for p in cached):
520
+ return cached
521
+ page_paths = render_pdf_pages_for_vl(
522
+ pdf_path,
523
+ max_pages=QWEN_VL_MAX_PAGES,
524
+ scale=QWEN_VL_RENDER_SCALE,
525
+ )
526
+ self._pdf_page_cache[cache_key] = page_paths
527
+ return page_paths
528
+
529
+ def _chat_completions(self, messages: List[Dict[str, Any]], max_tokens: int) -> str:
530
+ url = f"{_require_api_url()}/chat/completions"
531
+ payload: Dict[str, Any] = {
532
+ "model": CHAT_MODEL_ID,
533
+ "messages": messages,
534
+ "max_tokens": max_tokens,
535
+ "stream": False,
536
+ }
537
+ resp = requests.post(url, headers=_api_headers(), json=payload, timeout=API_TIMEOUT_SEC)
538
+ if resp.status_code >= 400:
539
+ raise RuntimeError(f"VL API error {resp.status_code}: {resp.text[:1000]}")
540
+ data = resp.json()
541
+ choices = data.get("choices") or []
542
+ if not choices:
543
+ raise RuntimeError(f"VL API returned no choices: {data}")
544
+ content = choices[0].get("message", {}).get("content", "")
545
+ if isinstance(content, str):
546
+ return content.strip()
547
+ if isinstance(content, list):
548
+ parts: List[str] = []
549
+ for item in content:
550
+ if isinstance(item, dict) and item.get("type") in {"text", "output_text"}:
551
+ parts.append(str(item.get("text") or item.get("content") or ""))
552
+ return "\n".join([p for p in parts if p]).strip()
553
+ return str(content).strip()
554
+
555
+ def _real_generate_text_from_pdf(self, pdf_path: str, prompt: str, max_tokens: Optional[int] = None) -> str:
556
+ page_image_paths = self._get_pdf_page_images(pdf_path)
557
+ content: List[Dict[str, Any]] = []
558
+ for p in page_image_paths:
559
+ content.append({"type": "image_url", "image_url": {"url": image_file_to_data_url(p)}})
560
+ content.append({"type": "text", "text": prompt})
561
+ messages = [{"role": "user", "content": content}]
562
+ return self._chat_completions(messages, max_tokens=max_tokens or QWEN_VL_MAX_NEW_TOKENS)
563
+
564
+ def _real_tts_single(self, text: str, out_path: str) -> str:
565
+ if not text.strip():
566
+ return write_tone_wav("empty", out_path)
567
+ openai_url = f"{_require_api_url()}/audio/speech"
568
+ openai_payload = {
569
+ "model": TTS_MODEL_ID,
570
+ "input": text,
571
+ "voice": TTS_SPEAKER,
572
+ "format": TTS_FORMAT,
573
+ }
574
+ openai_resp = requests.post(
575
+ openai_url,
576
+ headers=_api_headers(),
577
+ json=openai_payload,
578
+ timeout=API_TIMEOUT_SEC,
579
+ )
580
+ content_type = openai_resp.headers.get("content-type", "")
581
+ if openai_resp.status_code < 400 and "application/json" not in content_type.lower():
582
+ return _save_binary_audio(openai_resp.content, out_path)
583
+
584
+ # DashScope fallback: multimodal generation returns an audio URL in JSON.
585
+ payload = {
586
+ "model": TTS_MODEL_ID,
587
+ "input": {"text": text},
588
+ "parameters": {"voice": TTS_SPEAKER, "format": TTS_FORMAT},
589
+ }
590
+ resp = requests.post(
591
+ _dashscope_tts_url(),
592
+ headers=_api_headers(),
593
+ json=payload,
594
+ timeout=API_TIMEOUT_SEC,
595
+ )
596
+ if resp.status_code >= 400:
597
+ err1 = openai_resp.text[:500] if openai_resp.text else ""
598
+ err2 = resp.text[:500] if resp.text else ""
599
+ raise RuntimeError(
600
+ f"TTS API failed. openai-compatible: {openai_resp.status_code} {err1}; "
601
+ f"dashscope: {resp.status_code} {err2}"
602
+ )
603
+ data = resp.json()
604
+ audio_url = (
605
+ (((data.get("output") or {}).get("audio") or {}).get("url"))
606
+ or (((data.get("output") or {}).get("audio_url")))
607
+ )
608
+ if not audio_url:
609
+ raise RuntimeError(f"TTS API returned no audio URL: {data}")
610
+ audio_resp = requests.get(audio_url, timeout=API_TIMEOUT_SEC)
611
+ if audio_resp.status_code >= 400:
612
+ raise RuntimeError(f"Failed to download TTS audio {audio_resp.status_code}: {audio_resp.text[:500]}")
613
+ return _save_binary_audio(audio_resp.content, out_path)
614
+
615
+ def _real_tts(self, text: str, out_path: str) -> str:
616
+ chunks = split_text_for_tts(text, max_len=480)
617
+ if not chunks:
618
+ return write_tone_wav("empty", out_path)
619
+ if len(chunks) == 1:
620
+ return self._real_tts_single(chunks[0], out_path)
621
+
622
+ chunk_paths: List[str] = []
623
+ for idx, chunk in enumerate(chunks, start=1):
624
+ chunk_path = str(TMP_DIR / f"tts_chunk_{idx}_{uuid.uuid4().hex}.wav")
625
+ chunk_paths.append(self._real_tts_single(chunk, chunk_path))
626
+ return concat_wav_files(chunk_paths, out_path)
627
+
628
+ @spaces.GPU
629
+ def build_lesson_and_quiz(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
630
+ self.ensure_vl_loaded()
631
+ pdf_excerpt = extract_pdf_text(pdf_path)
632
+ cfg = character_cfg or get_character_config(None)
633
+ lecture_template = cfg.get("lecture_prompt_template", DEFAULT_LECTURE_PROMPT_TEMPLATE)
634
+ mcq_template = cfg.get("mcq_prompt_template", DEFAULT_MCQ_PROMPT_TEMPLATE)
635
+ mcq_retry_template = cfg.get("mcq_retry_prompt_template", DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE)
636
+
637
+ if self.mock_mode:
638
+ lecture_text = self._mock_generate_lecture(pdf_excerpt)
639
+ mcqs = self._mock_generate_mcqs(lecture_text)
640
+ else:
641
+ lecture_prompt = render_prompt_template(str(lecture_template), pdf_excerpt)
642
+ lecture_text = self._real_generate_text_from_pdf(pdf_path, lecture_prompt, max_tokens=QWEN_VL_MAX_NEW_TOKENS)
643
+ quiz_prompt = render_prompt_template(str(mcq_template), pdf_excerpt)
644
+ raw_mcq_json = self._real_generate_text_from_pdf(pdf_path, quiz_prompt, max_tokens=QWEN_VL_MCQ_MAX_NEW_TOKENS)
645
+ try:
646
+ mcqs = parse_mcq_json(raw_mcq_json)
647
+ except json.JSONDecodeError:
648
+ retry_prompt = render_prompt_template(str(mcq_retry_template), pdf_excerpt)
649
+ retry_raw = self._real_generate_text_from_pdf(
650
+ pdf_path,
651
+ retry_prompt,
652
+ max_tokens=QWEN_VL_MCQ_MAX_NEW_TOKENS,
653
+ )
654
+ mcqs = parse_mcq_json(retry_raw)
655
+
656
+ return {
657
+ "lecture_text": lecture_text,
658
+ "mcqs": [asdict(q) for q in mcqs],
659
+ "pdf_excerpt": pdf_excerpt,
660
+ }
661
+
662
+ @spaces.GPU
663
+ def build_lecture(self, pdf_path: str, character_cfg: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
664
+ self.ensure_vl_loaded()
665
+ pdf_excerpt = extract_pdf_text(pdf_path)
666
+ cfg = character_cfg or get_character_config(None)
667
+ lecture_template = cfg.get("lecture_prompt_template", DEFAULT_LECTURE_PROMPT_TEMPLATE)
668
+
669
+ if self.mock_mode:
670
+ lecture_text = self._mock_generate_lecture(pdf_excerpt)
671
+ else:
672
+ lecture_prompt = render_prompt_template(str(lecture_template), pdf_excerpt)
673
+ lecture_text = self._real_generate_text_from_pdf(pdf_path, lecture_prompt, max_tokens=QWEN_VL_MAX_NEW_TOKENS)
674
+
675
+ return {
676
+ "lecture_text": lecture_text,
677
+ "pdf_excerpt": pdf_excerpt,
678
+ }
679
+
680
+ @spaces.GPU
681
+ def build_mcqs(self, pdf_path: str, pdf_excerpt: str, character_cfg: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
682
+ self.ensure_vl_loaded()
683
+ cfg = character_cfg or get_character_config(None)
684
+ mcq_template = cfg.get("mcq_prompt_template", DEFAULT_MCQ_PROMPT_TEMPLATE)
685
+ mcq_retry_template = cfg.get("mcq_retry_prompt_template", DEFAULT_MCQ_RETRY_PROMPT_TEMPLATE)
686
+
687
+ if self.mock_mode:
688
+ mcqs = self._mock_generate_mcqs(pdf_excerpt)
689
+ return [asdict(q) for q in mcqs]
690
+
691
+ quiz_prompt = render_prompt_template(str(mcq_template), pdf_excerpt)
692
+ raw_mcq_json = self._real_generate_text_from_pdf(pdf_path, quiz_prompt, max_tokens=QWEN_VL_MCQ_MAX_NEW_TOKENS)
693
+ try:
694
+ mcqs = parse_mcq_json(raw_mcq_json)
695
+ except json.JSONDecodeError:
696
+ retry_prompt = render_prompt_template(str(mcq_retry_template), pdf_excerpt)
697
+ retry_raw = self._real_generate_text_from_pdf(
698
+ pdf_path,
699
+ retry_prompt,
700
+ max_tokens=QWEN_VL_MCQ_MAX_NEW_TOKENS,
701
+ )
702
+ mcqs = parse_mcq_json(retry_raw)
703
+ return [asdict(q) for q in mcqs]
704
+
705
+ @spaces.GPU
706
+ def synthesize_tts(self, text: str, name_prefix: str = "audio") -> str:
707
+ self.ensure_tts_loaded()
708
+ out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
709
+ if self.mock_mode:
710
+ return write_tone_wav(text, out_path)
711
+ return self._real_tts(text, out_path)
712
+
713
+
714
+ def parse_mcq_json(raw: str) -> List[MCQItem]:
715
+ cleaned = strip_code_fence(raw)
716
+ try:
717
+ payload = json.loads(cleaned)
718
+ except json.JSONDecodeError:
719
+ start = cleaned.find("{")
720
+ end = cleaned.rfind("}")
721
+ if start != -1 and end != -1 and end > start:
722
+ payload = json.loads(cleaned[start:end + 1])
723
+ else:
724
+ raise
725
+ questions = payload.get("questions", [])
726
+ parsed: List[MCQItem] = []
727
+ for item in questions[:5]:
728
+ q = str(item.get("question", "")).strip()
729
+ options = [str(x).strip() for x in item.get("options", [])][:4]
730
+ answer = str(item.get("answer", "")).strip().upper()
731
+ explanation = str(item.get("explanation", "")).strip()
732
+ if len(options) != 4:
733
+ continue
734
+ if answer not in {"A", "B", "C", "D"}:
735
+ continue
736
+ if not q or not explanation:
737
+ continue
738
+ parsed.append(MCQItem(question=q, options=options, answer=answer, explanation=explanation))
739
+ if len(parsed) != 5:
740
+ raise ValueError(f"Expected 5 MCQs, got {len(parsed)}")
741
+ return parsed
742
+
743
+
744
+ engine = QwenPipelineEngine()
745
+
746
+
747
+ def get_current_mcq(state: Dict[str, Any]) -> Optional[Dict[str, Any]]:
748
+ idx = state.get("current_index", 0)
749
+ mcqs = state.get("mcqs", [])
750
+ if not mcqs or idx < 0 or idx >= len(mcqs):
751
+ return None
752
+ return mcqs[idx]
753
+
754
+
755
+ def format_question_block(state: Dict[str, Any]) -> str:
756
+ mcq = get_current_mcq(state)
757
+ if mcq is None:
758
+ if state.get("completed"):
759
+ total = len(state.get("mcqs", []))
760
+ return f"### Quiz Completed\nScore: {state.get('score', 0)} / {total}"
761
+ return "### No question loaded"
762
+ qn = state["current_index"] + 1
763
+ total = len(state["mcqs"])
764
+ return f"### Question {qn}/{total}\n\n{mcq['question']}"
765
+
766
+
767
+ def current_choices(state: Dict[str, Any]) -> List[str]:
768
+ mcq = get_current_mcq(state)
769
+ if mcq is None:
770
+ return []
771
+ labels = ["A", "B", "C", "D"]
772
+ return [f"{labels[i]}. {opt}" for i, opt in enumerate(mcq["options"])]
773
+
774
+
775
+ def score_text(state: Dict[str, Any]) -> str:
776
+ total = len(state.get("mcqs", []))
777
+ return f"Score: {state.get('score', 0)} / {total}"
778
+
779
+
780
+ def reset_ui_from_state(
781
+ state: Dict[str, Any],
782
+ feedback: str = "",
783
+ *,
784
+ results_visible: bool = True,
785
+ loading_visible: bool = False,
786
+ loading_text: str = "正在生成中,请稍候...",
787
+ ):
788
+ quiz_ready = bool(state.get("mcqs"))
789
+ current_page = state.get("current_page", "explain")
790
+ show_explain_page = results_visible and current_page != "exam"
791
+ show_exam_page = results_visible and current_page == "exam"
792
+ next_visible = bool(state.get("awaiting_next_after_wrong"))
793
+ submit_interactive = quiz_ready and not state.get("completed", False)
794
+ radio_interactive = submit_interactive and not state.get("awaiting_next_after_wrong", False)
795
+ lecture_tts_ready = bool(state.get("lecture_text"))
796
+ explanation_tts_ready = bool(state.get("last_explanation_tts_text"))
797
+ if state.get("completed"):
798
+ next_visible = False
799
+ radio_interactive = False
800
+ return (
801
+ state,
802
+ gr.update(value=loading_text, visible=loading_visible),
803
+ gr.update(visible=show_explain_page),
804
+ gr.update(visible=show_exam_page),
805
+ gr.update(),
806
+ state.get("status", "Idle"),
807
+ state.get("lecture_text", ""),
808
+ state.get("lecture_audio_path", None),
809
+ gr.update(interactive=lecture_tts_ready),
810
+ gr.update(visible=lecture_tts_ready, interactive=lecture_tts_ready),
811
+ gr.update(value=format_question_block(state), visible=quiz_ready),
812
+ gr.update(choices=current_choices(state), value=None, interactive=radio_interactive),
813
+ score_text(state),
814
+ feedback,
815
+ state.get("explanation_audio_path", None),
816
+ gr.update(visible=explanation_tts_ready, interactive=explanation_tts_ready),
817
+ gr.update(visible=next_visible),
818
+ gr.update(interactive=submit_interactive),
819
+ gr.update(interactive=quiz_ready),
820
+ )
821
+
822
+
823
+ def process_pdf(pdf_file: Optional[str], character_id: str, state: Dict[str, Any]):
824
+ state = new_session_state()
825
+ state["character_id"] = character_id or DEFAULT_CHARACTER_ID
826
+ if not pdf_file:
827
+ state["status"] = "Please upload a PDF first."
828
+ yield reset_ui_from_state(state, feedback="Upload a PDF to start.", results_visible=False, loading_visible=False)
829
+ return
830
+
831
+ state["status"] = "正在生成中..."
832
+ yield reset_ui_from_state(
833
+ state,
834
+ feedback="正在读取论文并生成讲解与题目,请稍候...",
835
+ results_visible=False,
836
+ loading_visible=True,
837
+ loading_text="正在生成中,请稍候...",
838
+ )
839
+ try:
840
+ result = engine.build_lecture(pdf_file, get_character_config(state["character_id"]))
841
+ lecture_text = result["lecture_text"]
842
+ pdf_excerpt = result["pdf_excerpt"]
843
+
844
+ state["lecture_text"] = lecture_text
845
+ state["lecture_audio_path"] = None
846
+ state["explanation_audio_path"] = None
847
+ state["last_explanation_tts_text"] = ""
848
+ state["pdf_path"] = pdf_file
849
+ state["pdf_excerpt"] = pdf_excerpt
850
+ state["current_page"] = "explain"
851
+ state["mcqs"] = []
852
+ state["current_index"] = 0
853
+ state["score"] = 0
854
+ state["awaiting_next_after_wrong"] = False
855
+ state["completed"] = False
856
+ state["status"] = "Lecture generated."
857
+ yield reset_ui_from_state(
858
+ state,
859
+ feedback="Lecture is ready. Click 'Play Lecture Audio' if needed, then press 'Exam' to generate MCQs.",
860
+ results_visible=True,
861
+ loading_visible=False,
862
+ )
863
+ except Exception as exc:
864
+ state["status"] = "Failed during generation."
865
+ state["lecture_text"] = f"Error: {type(exc).__name__}: {exc}"
866
+ state["current_page"] = "explain"
867
+ yield reset_ui_from_state(
868
+ state,
869
+ feedback=f"Error: {type(exc).__name__}: {exc}",
870
+ results_visible=True,
871
+ loading_visible=False,
872
+ )
873
+
874
+
875
+ def submit_answer(choice: Optional[str], state: Dict[str, Any]):
876
+ if not state.get("mcqs"):
877
+ state["status"] = "No quiz loaded."
878
+ return reset_ui_from_state(state, feedback="Load a PDF first.")
879
+ if state.get("completed"):
880
+ return reset_ui_from_state(state, feedback="Quiz already completed.")
881
+ if state.get("awaiting_next_after_wrong"):
882
+ return reset_ui_from_state(state, feedback="Click Next Question to continue.")
883
+ if not choice:
884
+ return reset_ui_from_state(state, feedback="Please select an option.")
885
+
886
+ mcq = get_current_mcq(state)
887
+ if mcq is None:
888
+ state["status"] = "No current question."
889
+ return reset_ui_from_state(state, feedback="No current question.")
890
+
891
+ selected_label = choice.split(".", 1)[0].strip().upper()
892
+ correct_label = str(mcq["answer"]).upper()
893
+
894
+ if selected_label == correct_label:
895
+ state["score"] += 1
896
+ state["last_explanation_tts_text"] = ""
897
+ state["explanation_audio_path"] = None
898
+ state["status"] = "Correct answer."
899
+ if state["current_index"] >= len(state["mcqs"]) - 1:
900
+ state["completed"] = True
901
+ state["status"] = "Quiz completed."
902
+ return reset_ui_from_state(
903
+ state,
904
+ feedback=f"Correct. Quiz finished. Final score: {state['score']} / {len(state['mcqs'])}.",
905
+ )
906
+
907
+ state["current_index"] += 1
908
+ return reset_ui_from_state(state, feedback="Correct. Moving to the next question.")
909
+
910
+ correct_idx = ["A", "B", "C", "D"].index(correct_label)
911
+ correct_choice_display = f"{correct_label}. {mcq['options'][correct_idx]}"
912
+ explanation = mcq["explanation"]
913
+ state["last_explanation_tts_text"] = explanation
914
+ state["explanation_audio_path"] = None
915
+ state["awaiting_next_after_wrong"] = True
916
+ state["status"] = "Incorrect answer. Review explanation, then continue."
917
+ feedback = (
918
+ f"Incorrect.\n\nCorrect answer: {correct_choice_display}\n\nExplanation: {explanation}\n\n"
919
+ "Click 'Play Explanation Audio' to generate speech for the explanation."
920
+ )
921
+ return reset_ui_from_state(state, feedback=feedback)
922
+
923
+
924
+ def next_question(state: Dict[str, Any]):
925
+ if not state.get("mcqs"):
926
+ return reset_ui_from_state(state, feedback="Load a PDF first.")
927
+ if state.get("completed"):
928
+ return reset_ui_from_state(state, feedback="Quiz already completed.")
929
+ if not state.get("awaiting_next_after_wrong"):
930
+ return reset_ui_from_state(state, feedback="Use Submit Answer for the current question.")
931
+
932
+ if state["current_index"] >= len(state["mcqs"]) - 1:
933
+ state["completed"] = True
934
+ state["awaiting_next_after_wrong"] = False
935
+ state["last_explanation_tts_text"] = ""
936
+ state["explanation_audio_path"] = None
937
+ state["status"] = "Quiz completed."
938
+ return reset_ui_from_state(
939
+ state,
940
+ feedback=f"Quiz finished. Final score: {state['score']} / {len(state['mcqs'])}.",
941
+ )
942
+
943
+ state["current_index"] += 1
944
+ state["awaiting_next_after_wrong"] = False
945
+ state["last_explanation_tts_text"] = ""
946
+ state["explanation_audio_path"] = None
947
+ state["status"] = "Next question loaded."
948
+ return reset_ui_from_state(state, feedback="Moved to the next question.")
949
+
950
+
951
+ def restart_quiz(state: Dict[str, Any]):
952
+ if not state.get("mcqs"):
953
+ return reset_ui_from_state(new_session_state(), feedback="Load a PDF first.")
954
+ state["current_index"] = 0
955
+ state["score"] = 0
956
+ state["awaiting_next_after_wrong"] = False
957
+ state["completed"] = False
958
+ state["last_explanation_tts_text"] = ""
959
+ state["explanation_audio_path"] = None
960
+ state["status"] = "Quiz restarted."
961
+ return reset_ui_from_state(state, feedback="Quiz restarted.")
962
+
963
+
964
+ def go_to_exam_page(state: Dict[str, Any]):
965
+ if not state.get("lecture_text"):
966
+ state["status"] = "No lecture loaded."
967
+ yield reset_ui_from_state(state, feedback="Generate lecture first.", results_visible=False, loading_visible=False)
968
+ return
969
+
970
+ state["current_page"] = "exam"
971
+ state["exam_character_id"] = None
972
+ state["mcqs"] = []
973
+ state["current_index"] = 0
974
+ state["score"] = 0
975
+ state["awaiting_next_after_wrong"] = False
976
+ state["completed"] = False
977
+ state["last_explanation_tts_text"] = ""
978
+ state["explanation_audio_path"] = None
979
+ state["status"] = "请选择角色以生成 MCQ"
980
+ yield reset_ui_from_state(
981
+ state,
982
+ feedback="",
983
+ results_visible=True,
984
+ loading_visible=False,
985
+ )
986
+
987
+
988
+ def generate_exam_mcq(selected_character_id: Optional[str], state: Dict[str, Any]):
989
+ if not state.get("lecture_text"):
990
+ state["status"] = "No lecture loaded."
991
+ yield reset_ui_from_state(state, feedback="Generate lecture first.", results_visible=False, loading_visible=False)
992
+ return
993
+ if not selected_character_id:
994
+ state["status"] = "请选择角色以生成 MCQ"
995
+ yield reset_ui_from_state(state, feedback="", results_visible=True, loading_visible=False)
996
+ return
997
+
998
+ state["current_page"] = "exam"
999
+ state["exam_character_id"] = selected_character_id
1000
+ state["status"] = "正在生成 MCQ..."
1001
+ state["last_explanation_tts_text"] = ""
1002
+ state["explanation_audio_path"] = None
1003
+ state["mcqs"] = []
1004
+ yield reset_ui_from_state(
1005
+ state,
1006
+ feedback="",
1007
+ results_visible=False,
1008
+ loading_visible=True,
1009
+ loading_text="正在生成 MCQ,请稍候...",
1010
+ )
1011
+
1012
+ try:
1013
+ pdf_path = state.get("pdf_path")
1014
+ pdf_excerpt = state.get("pdf_excerpt", "")
1015
+ if not pdf_path:
1016
+ raise RuntimeError("PDF path missing in session state.")
1017
+ mcqs = engine.build_mcqs(pdf_path, pdf_excerpt, get_character_config(selected_character_id))
1018
+ state["mcqs"] = mcqs
1019
+ state["current_index"] = 0
1020
+ state["score"] = 0
1021
+ state["awaiting_next_after_wrong"] = False
1022
+ state["completed"] = False
1023
+ state["current_page"] = "exam"
1024
+ state["status"] = "MCQ generated."
1025
+ yield reset_ui_from_state(
1026
+ state,
1027
+ feedback="",
1028
+ results_visible=True,
1029
+ loading_visible=False,
1030
+ )
1031
+ except Exception as exc:
1032
+ state["current_page"] = "exam"
1033
+ state["status"] = "Failed during MCQ generation."
1034
+ yield reset_ui_from_state(
1035
+ state,
1036
+ feedback=f"Error: {type(exc).__name__}: {exc}",
1037
+ results_visible=True,
1038
+ loading_visible=False,
1039
+ )
1040
+
1041
+
1042
+ def on_generate_click(pdf_file: Optional[str], explain_character_id: str, exam_character_id: Optional[str], state: Dict[str, Any]):
1043
+ current_page = state.get("current_page", "explain")
1044
+ if current_page == "exam":
1045
+ yield from generate_exam_mcq(exam_character_id, state)
1046
+ return
1047
+ yield from process_pdf(pdf_file, explain_character_id, state)
1048
+
1049
+
1050
+ def go_to_explain_page(state: Dict[str, Any]):
1051
+ state["current_page"] = "explain"
1052
+ return reset_ui_from_state(state, feedback=state.get("status", "Explain page"))
1053
+
1054
+
1055
+ def on_character_change(character_id: str, state: Dict[str, Any]):
1056
+ cfg = get_character_config(character_id)
1057
+ state["character_id"] = cfg["id"]
1058
+ state["current_page"] = "explain"
1059
+ state["lecture_audio_path"] = None
1060
+ state["explanation_audio_path"] = None
1061
+ state["last_explanation_tts_text"] = ""
1062
+ # Keep generated content if user wants to compare, but hide result pages until next generate.
1063
+ return (
1064
+ state,
1065
+ build_character_header_html(cfg["id"]),
1066
+ build_chat_avatar_html(cfg["id"]),
1067
+ build_chat_meta_html(cfg["id"]),
1068
+ gr.update(visible=False),
1069
+ gr.update(visible=False),
1070
+ gr.update(visible=False),
1071
+ "Character switched. Upload PDF and click Generate.",
1072
+ )
1073
+
1074
+
1075
+ def on_exam_character_select(character_id: Optional[str], state: Dict[str, Any]):
1076
+ state["exam_character_id"] = character_id
1077
+ state["status"] = "Exam character selected. Click Generate to create MCQs."
1078
+ return state, state["status"]
1079
+
1080
+
1081
+ def play_lecture_audio(state: Dict[str, Any]):
1082
+ if not state.get("lecture_text"):
1083
+ state["status"] = "No lecture text available."
1084
+ return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
1085
+ try:
1086
+ state["status"] = "Generating lecture audio..."
1087
+ state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture")
1088
+ state["status"] = "Lecture audio ready."
1089
+ return state, state["status"], state["lecture_audio_path"], "Lecture audio generated."
1090
+ except Exception as exc:
1091
+ state["status"] = "Lecture audio generation failed."
1092
+ return state, state["status"], state.get("lecture_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
1093
+
1094
+
1095
+ def play_explanation_audio(state: Dict[str, Any]):
1096
+ text = state.get("last_explanation_tts_text", "")
1097
+ if not text:
1098
+ state["status"] = "No explanation available for TTS."
1099
+ return state, state["status"], state.get("explanation_audio_path"), "Answer a question incorrectly first."
1100
+ try:
1101
+ state["status"] = "Generating explanation audio..."
1102
+ state["explanation_audio_path"] = engine.synthesize_tts(text, name_prefix="explanation")
1103
+ state["status"] = "Explanation audio ready."
1104
+ return state, state["status"], state["explanation_audio_path"], "Explanation audio generated."
1105
+ except Exception as exc:
1106
+ state["status"] = "Explanation audio generation failed."
1107
+ return state, state["status"], state.get("explanation_audio_path"), f"TTS error: {type(exc).__name__}: {exc}"
1108
+
1109
+
1110
+ def build_css() -> str:
1111
+ bg_css = ""
1112
+
1113
+ return f"""
1114
+ @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Inter:wght@400;500;600;700&display=swap');
1115
+
1116
+ html, body {{
1117
+ min-height: 100%;
1118
+ height: auto;
1119
+ }}
1120
+ body {{
1121
+ background-color: #ffffff !important;
1122
+ font-family: "Inter", sans-serif !important;
1123
+ }}
1124
+ .app, #root, .gradio-container, .gradio-container > .main {{
1125
+ background: transparent !important;
1126
+ }}
1127
+ .gradio-container {{
1128
+ position: relative;
1129
+ z-index: 1;
1130
+ }}
1131
+ .gradio-container .block,
1132
+ .gradio-container .panel,
1133
+ .gradio-container .gr-box,
1134
+ .gradio-container .gr-form,
1135
+ .gradio-container .gr-group {{
1136
+ background: rgba(14, 16, 24, 0.62) !important;
1137
+ backdrop-filter: blur(2px);
1138
+ border-color: rgba(255, 255, 255, 0.08) !important;
1139
+ }}
1140
+ .gradio-container textarea,
1141
+ .gradio-container input,
1142
+ .gradio-container .wrap,
1143
+ .gradio-container .svelte-1ipelgc {{
1144
+ background-color: transparent !important;
1145
+ }}
1146
+ .gradio-container textarea,
1147
+ .gradio-container input {{
1148
+ box-shadow: none !important;
1149
+ color: #eef1f6 !important;
1150
+ }}
1151
+ .gradio-container label,
1152
+ .gradio-container .prose,
1153
+ .gradio-container .prose p,
1154
+ .gradio-container .prose code,
1155
+ .gradio-container .prose strong {{
1156
+ color: #eef1f6 !important;
1157
+ }}
1158
+ #page-shell {{
1159
+ min-height: 100vh;
1160
+ padding: 2rem 1.2rem 9rem 1.2rem;
1161
+ max-width: 980px;
1162
+ margin: 0 auto;
1163
+ }}
1164
+ #page-shell .hero {{
1165
+ text-align: center;
1166
+ margin: 1.2rem 0 1.8rem 0;
1167
+ }}
1168
+ #page-shell .hero-title {{
1169
+ margin: 0;
1170
+ color: #f4f6fb;
1171
+ letter-spacing: 0.01em;
1172
+ font-family: "Instrument Serif", Georgia, serif;
1173
+ font-weight: 400;
1174
+ font-size: clamp(2.05rem, 3vw, 2.75rem);
1175
+ text-shadow: 0 1px 8px rgba(0,0,0,0.35);
1176
+ }}
1177
+ #page-shell .hero-sub {{
1178
+ margin: 0.65rem 0 0 0;
1179
+ color: rgba(241, 244, 251, 0.88);
1180
+ font-size: 0.98rem;
1181
+ }}
1182
+ #page-shell .hero-note {{
1183
+ margin-top: 0.5rem;
1184
+ color: rgba(241, 244, 251, 0.72);
1185
+ font-size: 0.92rem;
1186
+ }}
1187
+ #character-card {{
1188
+ background: transparent !important;
1189
+ border: none !important;
1190
+ box-shadow: none !important;
1191
+ }}
1192
+ .char-wrap {{
1193
+ display: flex;
1194
+ flex-direction: column;
1195
+ align-items: center;
1196
+ gap: 0.45rem;
1197
+ margin-bottom: 0.8rem;
1198
+ }}
1199
+ .char-avatar {{
1200
+ width: 84px;
1201
+ height: 84px;
1202
+ border-radius: 999px;
1203
+ object-fit: cover;
1204
+ border: 1px solid rgba(255,255,255,0.18);
1205
+ box-shadow: 0 8px 26px rgba(0,0,0,0.28);
1206
+ }}
1207
+ .char-name {{
1208
+ color: #f6f7fb;
1209
+ font-weight: 600;
1210
+ font-size: 1.05rem;
1211
+ }}
1212
+ .char-tag {{
1213
+ color: rgba(240,243,250,0.78);
1214
+ font-size: 0.95rem;
1215
+ }}
1216
+ .char-byline {{
1217
+ color: rgba(240,243,250,0.58);
1218
+ font-size: 0.85rem;
1219
+ }}
1220
+ #character-select-wrap {{
1221
+ background: transparent !important;
1222
+ border: none !important;
1223
+ box-shadow: none !important;
1224
+ margin: -0.1rem auto 0.8rem auto !important;
1225
+ max-width: 220px !important;
1226
+ min-width: 0 !important;
1227
+ padding: 0 !important;
1228
+ }}
1229
+ #page-shell .flat-select,
1230
+ #page-shell .flat-select > div,
1231
+ #page-shell .flat-select .block,
1232
+ #page-shell .flat-select .gradio-dropdown {{
1233
+ background: transparent !important;
1234
+ border: none !important;
1235
+ box-shadow: none !important;
1236
+ padding: 0 !important;
1237
+ }}
1238
+ #character-select-wrap,
1239
+ #character-select-wrap > div,
1240
+ #character-select-wrap > div > div,
1241
+ #character-select-wrap .wrap,
1242
+ #character-select-wrap input,
1243
+ #character-select-wrap button {{
1244
+ background: transparent !important;
1245
+ border: none !important;
1246
+ box-shadow: none !important;
1247
+ }}
1248
+ #character-select-wrap .wrap {{
1249
+ justify-content: center;
1250
+ padding: 0 !important;
1251
+ min-height: 20px !important;
1252
+ }}
1253
+ #character-select-wrap input,
1254
+ #character-select-wrap [role="combobox"],
1255
+ #character-select-wrap [role="combobox"] {{
1256
+ font-family: "Inter", sans-serif !important;
1257
+ font-size: 0.88rem !important;
1258
+ font-weight: 400 !important;
1259
+ color: rgba(240,243,250,0.78) !important;
1260
+ text-align: center !important;
1261
+ }}
1262
+ #character-select-wrap [role="combobox"] {{
1263
+ min-height: 20px !important;
1264
+ padding: 0 !important;
1265
+ }}
1266
+ #character-select-wrap [role="listbox"],
1267
+ [data-testid="dropdown-menu"] {{
1268
+ background: rgba(20, 22, 30, 0.96) !important;
1269
+ border: 1px solid rgba(255,255,255,0.12) !important;
1270
+ box-shadow: 0 12px 30px rgba(0,0,0,0.35) !important;
1271
+ z-index: 9999 !important;
1272
+ }}
1273
+ [data-testid="dropdown-menu"] * {{
1274
+ color: #eef1f6 !important;
1275
+ }}
1276
+ #character-select-wrap svg,
1277
+ #character-select-wrap [data-icon] {{
1278
+ opacity: 0.65 !important;
1279
+ color: rgba(240,243,250,0.78) !important;
1280
+ }}
1281
+ #character-select-wrap {{
1282
+ display: flex !important;
1283
+ justify-content: center !important;
1284
+ }}
1285
+ #character-select-wrap .wrap {{
1286
+ display: flex !important;
1287
+ gap: 0.35rem !important;
1288
+ flex-wrap: wrap !important;
1289
+ justify-content: center !important;
1290
+ }}
1291
+ #character-select-wrap label {{
1292
+ background: transparent !important;
1293
+ border: 1px solid rgba(255,255,255,0.14) !important;
1294
+ border-radius: 999px !important;
1295
+ padding: 0.18rem 0.65rem !important;
1296
+ min-height: 0 !important;
1297
+ }}
1298
+ #character-select-wrap label span {{
1299
+ color: rgba(240,243,250,0.78) !important;
1300
+ font-size: 0.88rem !important;
1301
+ }}
1302
+ #character-select-wrap input[type="radio"] {{
1303
+ display: none !important;
1304
+ }}
1305
+ #character-select-wrap label:has(input[type="radio"]:checked) {{
1306
+ background: rgba(255,255,255,0.10) !important;
1307
+ border-color: rgba(255,255,255,0.22) !important;
1308
+ }}
1309
+ #character-select-wrap label:has(input[type="radio"]:checked) span {{
1310
+ color: #ffffff !important;
1311
+ }}
1312
+ #gen-loading {{
1313
+ text-align: center;
1314
+ padding: 14px 18px;
1315
+ margin: 0 0 12px 0;
1316
+ color: #f2f3f8;
1317
+ background: rgba(255,255,255,0.08);
1318
+ border: 1px solid rgba(255,255,255,0.12);
1319
+ border-radius: 12px;
1320
+ backdrop-filter: blur(3px);
1321
+ }}
1322
+ #results-panel {{
1323
+ background: transparent !important;
1324
+ border: none !important;
1325
+ box-shadow: none !important;
1326
+ padding: 0 !important;
1327
+ gap: 0.75rem;
1328
+ }}
1329
+ #chat-row {{
1330
+ background: transparent !important;
1331
+ border: none !important;
1332
+ box-shadow: none !important;
1333
+ align-items: flex-start !important;
1334
+ }}
1335
+ #chat-avatar-col {{
1336
+ max-width: 54px !important;
1337
+ min-width: 54px !important;
1338
+ }}
1339
+ .mini-avatar {{
1340
+ width: 34px;
1341
+ height: 34px;
1342
+ border-radius: 999px;
1343
+ object-fit: cover;
1344
+ border: 1px solid rgba(255,255,255,0.16);
1345
+ }}
1346
+ #chat-main {{
1347
+ flex: 1;
1348
+ }}
1349
+ #chat-meta {{
1350
+ margin: 0 0 0.45rem 0;
1351
+ color: rgba(245,247,252,0.95);
1352
+ font-size: 0.95rem;
1353
+ font-weight: 600;
1354
+ }}
1355
+ #chat-meta .pill {{
1356
+ margin-left: 0.5rem;
1357
+ padding: 0.08rem 0.45rem;
1358
+ border-radius: 999px;
1359
+ background: rgba(255,255,255,0.1);
1360
+ color: rgba(255,255,255,0.78);
1361
+ font-size: 0.78rem;
1362
+ }}
1363
+ #lecture-wrap {{
1364
+ background: rgba(33, 36, 46, 0.82) !important;
1365
+ border: 1px solid rgba(255,255,255,0.06) !important;
1366
+ border-radius: 20px !important;
1367
+ padding: 0.35rem 0.45rem !important;
1368
+ }}
1369
+ #lecture-wrap textarea {{
1370
+ font-style: italic;
1371
+ line-height: 1.45 !important;
1372
+ color: rgba(244,246,251,0.95) !important;
1373
+ }}
1374
+ #lecture-actions {{
1375
+ background: transparent !important;
1376
+ border: none !important;
1377
+ box-shadow: none !important;
1378
+ margin-top: 0.35rem !important;
1379
+ }}
1380
+ #exam-entry-wrap {{
1381
+ background: transparent !important;
1382
+ border: none !important;
1383
+ box-shadow: none !important;
1384
+ margin-top: 0.25rem !important;
1385
+ }}
1386
+ #bottom-composer {{
1387
+ position: fixed;
1388
+ left: 50%;
1389
+ transform: translateX(-50%);
1390
+ bottom: 18px;
1391
+ width: min(860px, calc(100vw - 28px));
1392
+ z-index: 50;
1393
+ background: rgba(24, 26, 34, 0.88);
1394
+ border: 1px solid rgba(255,255,255,0.08);
1395
+ border-radius: 999px;
1396
+ box-shadow: 0 16px 40px rgba(0,0,0,0.22);
1397
+ backdrop-filter: blur(10px);
1398
+ padding: 8px 10px;
1399
+ }}
1400
+ #bottom-composer .wrap {{
1401
+ border: none !important;
1402
+ }}
1403
+ #bottom-composer .block {{
1404
+ background: transparent !important;
1405
+ border: none !important;
1406
+ box-shadow: none !important;
1407
+ }}
1408
+ #bottom-composer button {{
1409
+ border-radius: 999px !important;
1410
+ }}
1411
+ #generate-btn button {{
1412
+ min-height: 38px !important;
1413
+ height: 38px !important;
1414
+ padding: 0 18px !important;
1415
+ font-size: 0.9rem !important;
1416
+ line-height: 1 !important;
1417
+ min-width: 132px !important;
1418
+ }}
1419
+ #pdf-uploader {{
1420
+ min-height: 42px;
1421
+ }}
1422
+ #pdf-uploader .wrap {{
1423
+ min-height: 42px !important;
1424
+ padding: 4px 10px !important;
1425
+ }}
1426
+ #pdf-uploader [data-testid="file-upload-dropzone"] {{
1427
+ min-height: 42px !important;
1428
+ height: 42px !important;
1429
+ padding: 2px 8px !important;
1430
+ display: flex !important;
1431
+ align-items: center !important;
1432
+ justify-content: center !important;
1433
+ }}
1434
+ #pdf-uploader [data-testid="file-upload-dropzone"] * {{
1435
+ font-size: 0.88rem !important;
1436
+ }}
1437
+ #status-wrap, #quiz-wrap, #tts-wrap, #explain-wrap {{
1438
+ background: rgba(18, 20, 28, 0.58) !important;
1439
+ border-radius: 16px !important;
1440
+ }}
1441
+ #exam-page {{
1442
+ background: transparent !important;
1443
+ border: none !important;
1444
+ box-shadow: none !important;
1445
+ padding: 0 !important;
1446
+ }}
1447
+ #exam-nav {{
1448
+ background: transparent !important;
1449
+ border: none !important;
1450
+ box-shadow: none !important;
1451
+ justify-content: space-between;
1452
+ align-items: center;
1453
+ }}
1454
+ .container {{max-width: 980px; margin: 0 auto;}}
1455
+ .mono {{font-family: ui-monospace, Menlo, Consolas, monospace;}}
1456
+ {bg_css}
1457
+ """
1458
+
1459
+
1460
+ CSS = build_css()
1461
+
1462
+
1463
+ def _image_data_url(path: Path) -> str:
1464
+ if not path.exists():
1465
+ return ""
1466
+ mime = "image/jpeg" if path.suffix.lower() in {".jpg", ".jpeg"} else "image/png"
1467
+ return f"data:{mime};base64," + base64.b64encode(path.read_bytes()).decode("ascii")
1468
+
1469
+
1470
+ def build_character_header_html(character_id: Optional[str] = None) -> str:
1471
+ cfg = get_character_config(character_id)
1472
+ avatar_url = _image_data_url(Path(cfg.get("avatar_path", ""))) if cfg.get("avatar_path") else ""
1473
+ avatar_img = f'<img class="char-avatar" src="{avatar_url}" alt="avatar" />' if avatar_url else ""
1474
+ return f"""
1475
+ <section class="hero">
1476
+ <div class="char-wrap">
1477
+ {avatar_img}
1478
+ <div class="char-name">{cfg.get("display_name", "PDF Paper Tutor")}</div>
1479
+ <div class="char-tag">{cfg.get("tagline", "")}</div>
1480
+ <div class="char-byline">{cfg.get("byline", "")}</div>
1481
+ </div>
1482
+ </section>
1483
+ """
1484
+
1485
+
1486
+ def build_chat_avatar_html(character_id: Optional[str] = None) -> str:
1487
+ cfg = get_character_config(character_id)
1488
+ avatar_url = _image_data_url(Path(cfg.get("avatar_path", ""))) if cfg.get("avatar_path") else ""
1489
+ return f'<img class="mini-avatar" src="{avatar_url}" alt="avatar" />' if avatar_url else ""
1490
+
1491
+
1492
+ def build_chat_meta_html(character_id: Optional[str] = None) -> str:
1493
+ cfg = get_character_config(character_id)
1494
+ return f"""
1495
+ <div id="chat-meta">{cfg.get("chat_label", "PDF Paper Tutor")} <span class="pill">{cfg.get("chat_mode", "paper mode")}</span></div>
1496
+ """
1497
+
1498
+
1499
+ with gr.Blocks(css=CSS) as demo:
1500
+ with gr.Column(elem_id="page-shell"):
1501
+ character_header_html = gr.HTML(build_character_header_html(DEFAULT_CHARACTER_ID), elem_id="character-card")
1502
+ character_dropdown = gr.Radio(
1503
+ choices=[(cfg["display_name"], cid) for cid, cfg in CHARACTER_CONFIGS.items()],
1504
+ value=DEFAULT_CHARACTER_ID,
1505
+ label="",
1506
+ show_label=False,
1507
+ interactive=True,
1508
+ elem_id="character-select-wrap",
1509
+ container=False,
1510
+ )
1511
+
1512
+ state = gr.State(new_session_state())
1513
+
1514
+ loading_md = gr.Markdown("正在生成中,请稍候...", elem_id="gen-loading", visible=False)
1515
+
1516
+ with gr.Column(visible=False, elem_id="results-panel") as explain_page:
1517
+ with gr.Row(elem_id="chat-row"):
1518
+ with gr.Column(scale=0, elem_id="chat-avatar-col"):
1519
+ chat_avatar_html = gr.HTML(build_chat_avatar_html(DEFAULT_CHARACTER_ID))
1520
+ with gr.Column(elem_id="chat-main"):
1521
+ chat_meta_html = gr.HTML(build_chat_meta_html(DEFAULT_CHARACTER_ID))
1522
+ with gr.Column(elem_id="lecture-wrap"):
1523
+ lecture_box = gr.Textbox(
1524
+ label="",
1525
+ show_label=False,
1526
+ lines=10,
1527
+ interactive=False,
1528
+ placeholder="Generated lecture explanation will appear here...",
1529
+ )
1530
+ with gr.Row(elem_id="lecture-actions"):
1531
+ play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
1532
+ with gr.Row(elem_id="exam-entry-wrap"):
1533
+ exam_btn = gr.Button("Exam", interactive=False, variant="secondary", scale=0)
1534
+
1535
+ with gr.Column(elem_id="tts-wrap"):
1536
+ lecture_audio = gr.Audio(label="Lecture TTS", type="filepath")
1537
+
1538
+ with gr.Column(visible=False, elem_id="exam-page") as exam_page:
1539
+ with gr.Row(elem_id="exam-nav"):
1540
+ back_btn = gr.Button("Back", variant="secondary", scale=0)
1541
+ exam_character_radio = gr.Radio(
1542
+ choices=[(cfg["display_name"], cid) for cid, cfg in CHARACTER_CONFIGS.items()],
1543
+ value=None,
1544
+ label="Choose character for MCQ",
1545
+ interactive=True,
1546
+ elem_id="character-select-wrap",
1547
+ container=False,
1548
+ )
1549
+ with gr.Column(elem_id="status-wrap"):
1550
+ status_box = gr.Textbox(label="Status", value="Idle", interactive=False)
1551
+ with gr.Column(elem_id="quiz-wrap"):
1552
+ quiz_header = gr.Markdown("### No question loaded", visible=False)
1553
+ choice_radio = gr.Radio(choices=[], label="Select one answer", interactive=False)
1554
+ with gr.Row():
1555
+ submit_btn = gr.Button("Submit Answer", interactive=False)
1556
+ next_btn = gr.Button("Next Question", visible=False)
1557
+ restart_btn = gr.Button("Restart Quiz", interactive=False)
1558
+ score_box = gr.Textbox(label="Score", value="Score: 0 / 0", interactive=False)
1559
+ feedback_box = gr.Textbox(label="Feedback / Explanation", lines=8, interactive=False)
1560
+
1561
+ with gr.Column(elem_id="explain-wrap"):
1562
+ explanation_audio = gr.Audio(label="Explanation TTS (shown after wrong answer)", type="filepath")
1563
+ play_expl_btn = gr.Button("Play Explanation Audio", visible=False, interactive=False)
1564
+
1565
+ with gr.Row(elem_id="bottom-composer"):
1566
+ pdf_input = gr.File(
1567
+ label="",
1568
+ show_label=False,
1569
+ file_types=[".pdf"],
1570
+ type="filepath",
1571
+ elem_id="pdf-uploader",
1572
+ scale=7,
1573
+ min_width=0,
1574
+ )
1575
+ run_btn = gr.Button("Generate", variant="primary", elem_id="generate-btn", scale=3, min_width=120)
1576
+
1577
+ outputs = [
1578
+ state,
1579
+ loading_md,
1580
+ explain_page,
1581
+ exam_page,
1582
+ exam_character_radio,
1583
+ status_box,
1584
+ lecture_box,
1585
+ lecture_audio,
1586
+ play_lecture_btn,
1587
+ exam_btn,
1588
+ quiz_header,
1589
+ choice_radio,
1590
+ score_box,
1591
+ feedback_box,
1592
+ explanation_audio,
1593
+ play_expl_btn,
1594
+ next_btn,
1595
+ submit_btn,
1596
+ restart_btn,
1597
+ ]
1598
+
1599
+ run_btn.click(fn=on_generate_click, inputs=[pdf_input, character_dropdown, exam_character_radio, state], outputs=outputs)
1600
+ character_dropdown.change(
1601
+ fn=on_character_change,
1602
+ inputs=[character_dropdown, state],
1603
+ outputs=[state, character_header_html, chat_avatar_html, chat_meta_html, explain_page, exam_page, loading_md, status_box],
1604
+ )
1605
+ exam_btn.click(fn=go_to_exam_page, inputs=[state], outputs=outputs)
1606
+ exam_character_radio.change(fn=on_exam_character_select, inputs=[exam_character_radio, state], outputs=[state, status_box])
1607
+ back_btn.click(fn=go_to_explain_page, inputs=[state], outputs=outputs)
1608
+ submit_btn.click(fn=submit_answer, inputs=[choice_radio, state], outputs=outputs)
1609
+ next_btn.click(fn=next_question, inputs=[state], outputs=outputs)
1610
+ restart_btn.click(fn=restart_quiz, inputs=[state], outputs=outputs)
1611
+ play_lecture_btn.click(
1612
+ fn=play_lecture_audio,
1613
+ inputs=[state],
1614
+ outputs=[state, status_box, lecture_audio, feedback_box],
1615
+ )
1616
+ play_expl_btn.click(
1617
+ fn=play_explanation_audio,
1618
+ inputs=[state],
1619
+ outputs=[state, status_box, explanation_audio, feedback_box],
1620
+ )
1621
+
1622
+
1623
+ if __name__ == "__main__":
1624
+ demo.queue()
1625
+ demo.launch()
characters/Mcgonagall/lecture_prompt.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一名风格冷静、严谨但清晰的课程助教(Snape 风格),请阅读用户上传的论文内容,并输出一段中文讲解,要求:
2
+ 1. 先说明论文要解决的问题和背景;
3
+ 2. 再解释核心方法(按步骤/模块);
4
+ 3. 再总结实验结果或亮点;
5
+ 4. 最后给出局限性与适用场景;
6
+ 5. 语言精炼、逻辑清楚,适合课堂讲解(约 400-700 字);
7
+ 6. 不要虚构论文中不存在的实验细节。
8
+
9
+ 论文内容(可能是节选):
10
+ {document}
characters/Mcgonagall/mcq_prompt.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
2
+ 严格输出 JSON(不要 markdown 代码块),格式如下:
3
+ {
4
+ "questions": [
5
+ {
6
+ "question": "...",
7
+ "options": ["A选项", "B选项", "C选项", "D选项"],
8
+ "answer": "A",
9
+ "explanation": "..."
10
+ }
11
+ ]
12
+ }
13
+
14
+ 要求:
15
+ 1. 共 5 题;
16
+ 2. 每题 4 个选项;
17
+ 3. answer 必须是 A/B/C/D;
18
+ 4. 解析要说明为什么正确,以及常见误区;
19
+ 5. 题目覆盖背景、方法、结果、局限性;
20
+ 6. 题目难度适中,适合课程课堂测验。
21
+
22
+ 论文内容(可能是节选):
23
+ {document}
characters/Mcgonagall/mcq_retry_prompt.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
2
+
3
+ 限制:
4
+ 1. 必须是紧凑 JSON(单行也可以);
5
+ 2. 共 5 题;
6
+ 3. 每题字段:question、options(4项)、answer(A/B/C/D)、explanation;
7
+ 4. explanation 保持简短(1-2句);
8
+ 5. 不要输出任何 JSON 以外内容。
9
+
10
+ 输出格式:
11
+ {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
12
+
13
+ 论文内容:
14
+ {document}
characters/Mcgonagall/meta.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "Mcgonagall",
3
+ "display_name": "Mcgonagall",
4
+ "tagline": "Research paper explainer · MCQ coach",
5
+ "byline": "By @local-demo",
6
+ "chat_label": "Mcgonagall",
7
+ "chat_mode": "paper mode",
8
+ "avatar": "avatar.jpg",
9
+ "lecture_prompt_file": "lecture_prompt.txt",
10
+ "mcq_prompt_file": "mcq_prompt.txt",
11
+ "mcq_retry_prompt_file": "mcq_retry_prompt.txt"
12
+ }
characters/snape/lecture_prompt.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一名风格冷静、严谨但清晰的课程助教(Snape 风格),请阅读用户上传的论文内容,并输出一段中文讲解,要求:
2
+ 1. 先说明论文要解决的问题和背景;
3
+ 2. 再解释核心方法(按步骤/模块);
4
+ 3. 再总结实验结果或亮点;
5
+ 4. 最后给出局限性与适用场景;
6
+ 5. 语言精炼、逻辑清楚,适合课堂讲解(约 400-700 字);
7
+ 6. 不要虚构论文中不存在的实验细节。
8
+
9
+ 论文内容(可能是节选):
10
+ {document}
characters/snape/mcq_prompt.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
2
+ 严格输出 JSON(不要 markdown 代码块),格式如下:
3
+ {
4
+ "questions": [
5
+ {
6
+ "question": "...",
7
+ "options": ["A选项", "B选项", "C选项", "D选项"],
8
+ "answer": "A",
9
+ "explanation": "..."
10
+ }
11
+ ]
12
+ }
13
+
14
+ 要求:
15
+ 1. 共 5 题;
16
+ 2. 每题 4 个选项;
17
+ 3. answer 必须是 A/B/C/D;
18
+ 4. 解析要说明为什么正确,以及常见误区;
19
+ 5. 题目覆盖背景、方法、结果、局限性;
20
+ 6. 题目难度适中,适合课程课堂测验。
21
+
22
+ 论文内容(可能是节选):
23
+ {document}
characters/snape/mcq_retry_prompt.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
2
+
3
+ 限制:
4
+ 1. 必须是紧凑 JSON(单行也可以);
5
+ 2. 共 5 题;
6
+ 3. 每题字段:question、options(4项)、answer(A/B/C/D)、explanation;
7
+ 4. explanation 保持简短(1-2句);
8
+ 5. 不要输出任何 JSON 以外内容。
9
+
10
+ 输出格式:
11
+ {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
12
+
13
+ 论文内容:
14
+ {document}
characters/snape/meta.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "snape",
3
+ "display_name": "Snape",
4
+ "tagline": "Research paper explainer · MCQ coach",
5
+ "byline": "By @local-demo",
6
+ "chat_label": "Snape",
7
+ "chat_mode": "paper mode",
8
+ "avatar": "avatar.jpg",
9
+ "lecture_prompt_file": "lecture_prompt.txt",
10
+ "mcq_prompt_file": "mcq_prompt.txt",
11
+ "mcq_retry_prompt_file": "mcq_retry_prompt.txt"
12
+ }
characters/test/lecture_prompt.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ 你是一名风格冷静、严谨但清晰的课程助教(Snape 风格),请阅读用户上传的论文内容,并输出一段中文讲解,要求:
2
+ 1. 先说明论文要解决的问题和背景;
3
+ 2. 再解释核心方法(按步骤/模块);
4
+ 3. 再总结实验结果或亮点;
5
+ 4. 最后给出局限性与适用场景;
6
+ 5. 语言精炼、逻辑清楚,适合课堂讲解(约 400-700 字);
7
+ 6. 不要虚构论文中不存在的实验细节。
8
+
9
+ 论文内容(可能是节选):
10
+ {document}
characters/test/mcq_prompt.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 请基于下面论文内容,生成 5 道中文单选题(MCQ),用于课堂测验。
2
+ 严格输出 JSON(不要 markdown 代码块),格式如下:
3
+ {
4
+ "questions": [
5
+ {
6
+ "question": "...",
7
+ "options": ["A选项", "B选项", "C选项", "D选项"],
8
+ "answer": "A",
9
+ "explanation": "..."
10
+ }
11
+ ]
12
+ }
13
+
14
+ 要求:
15
+ 1. 共 5 题;
16
+ 2. 每题 4 个选项;
17
+ 3. answer 必须是 A/B/C/D;
18
+ 4. 解析要说明为什么正确,以及常见误区;
19
+ 5. 题目覆盖背景、方法、结果、局限性;
20
+ 6. 题目难度适中,适合课程课堂测验。
21
+
22
+ 论文内容(可能是节选):
23
+ {document}
characters/test/mcq_retry_prompt.txt ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 基于以下论文内容生成 5 道中文单选题。只输出合法 JSON,不要任何解释,不要 markdown。
2
+
3
+ 限制:
4
+ 1. 必须是紧凑 JSON(单行也可以);
5
+ 2. 共 5 题;
6
+ 3. 每题字段:question、options(4项)、answer(A/B/C/D)、explanation;
7
+ 4. explanation 保持简短(1-2句);
8
+ 5. 不要输出任何 JSON 以外内容。
9
+
10
+ 输出格式:
11
+ {"questions":[{"question":"...","options":["...","...","...","..."],"answer":"A","explanation":"..."}]}
12
+
13
+ 论文内容:
14
+ {document}
characters/test/meta.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "id": "test",
3
+ "display_name": "Test",
4
+ "tagline": "Research paper explainer · MCQ coach",
5
+ "byline": "By @local-demo",
6
+ "chat_label": "Test",
7
+ "chat_mode": "paper mode",
8
+ "avatar": "avatar.jpg",
9
+ "lecture_prompt_file": "lecture_prompt.txt",
10
+ "mcq_prompt_file": "mcq_prompt.txt",
11
+ "mcq_retry_prompt_file": "mcq_retry_prompt.txt"
12
+ }
notebook_prototype.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Notebook-friendly prototype helpers for the coursework workflow:
3
+ PDF -> lecture text -> 5 MCQs (with answers + explanations)
4
+
5
+ Usage in Colab/Jupyter:
6
+ from notebook_prototype import run_prototype
7
+ result = run_prototype("/path/to/paper.pdf", mock=True)
8
+ print(result["lecture_text"])
9
+ print(result["mcqs"][0])
10
+
11
+ Set mock=False to reuse the real Qwen3-VL backend from app.py (same prompts/parsing flow).
12
+ """
13
+
14
+ import json
15
+ import re
16
+ from pathlib import Path
17
+ from typing import Any, Dict, List
18
+
19
+ try:
20
+ from pypdf import PdfReader
21
+ except Exception:
22
+ PdfReader = None # type: ignore
23
+
24
+
25
+ LECTURE_PROMPT = """
26
+ 你是一名课程助教。请阅读论文内容并写一段中文讲解(400-700字),包括:
27
+ 问题背景、核心方法、实验亮点、局限性与适用场景。
28
+
29
+ 论文内容:
30
+ {document}
31
+ """.strip()
32
+
33
+
34
+ MCQ_PROMPT = """
35
+ 请基于论文内容生成 5 道中文单选题,并严格输出 JSON:
36
+ {
37
+ "questions": [
38
+ {
39
+ "question": "...",
40
+ "options": ["...", "...", "...", "..."],
41
+ "answer": "A",
42
+ "explanation": "..."
43
+ }
44
+ ]
45
+ }
46
+ """.strip()
47
+
48
+
49
+ def extract_pdf_text(pdf_path: str, max_chars: int = 16000) -> str:
50
+ if PdfReader is None:
51
+ raise RuntimeError("pypdf is not installed.")
52
+ reader = PdfReader(pdf_path)
53
+ chunks: List[str] = []
54
+ total = 0
55
+ for i, page in enumerate(reader.pages, start=1):
56
+ text = (page.extract_text() or "").strip()
57
+ if not text:
58
+ continue
59
+ part = f"[Page {i}]\\n{text}\\n"
60
+ chunks.append(part)
61
+ total += len(part)
62
+ if total >= max_chars:
63
+ break
64
+ if not chunks:
65
+ return "No extractable text found. For scanned PDFs, convert pages to images and feed them to a VL model."
66
+ return "\\n".join(chunks)[:max_chars]
67
+
68
+
69
+ def _mock_lecture(document: str) -> str:
70
+ short = re.sub(r"\\s+", " ", document)[:1000]
71
+ return (
72
+ "【Mock讲解】这篇论文主要围绕一个机器学习/生成式AI任务展开,目标是改善现有方法在效果、效率或稳定性上的不足。"
73
+ "作者通过提出新的模型结构、训练策略或推理流程来解决该问题,并通过实验与基线比较验证方法有效性。"
74
+ "在阅读时建议重点关注:任务定义、输入输出、方法模块、实验设置、指标、消融实验,以及论文提到的局限性。\\n\\n"
75
+ f"论文节选:{short}"
76
+ )
77
+
78
+
79
+ def _mock_mcqs() -> List[Dict[str, Any]]:
80
+ return [
81
+ {
82
+ "question": "论文讲解中首先应说明什么?",
83
+ "options": ["问题背景与任务目标", "部署服务器价格", "前端样式颜色", "Git分支命名"],
84
+ "answer": "A",
85
+ "explanation": "先解释背景与目标,听众才知道作者为什么提出该方法。",
86
+ },
87
+ {
88
+ "question": "哪一项更适合用于解释论文方法?",
89
+ "options": ["按模块/步骤描述输入到输出流程", "只贴公式不解释", "只读摘要", "只看结论"],
90
+ "answer": "A",
91
+ "explanation": "方法讲解应结构化呈现,否则难以理解论文贡献点。",
92
+ },
93
+ {
94
+ "question": "为什么要生成带解析的选择题?",
95
+ "options": ["支持交互式教学反馈", "为了减少推理时间", "为了替代PDF上传", "为了训练TTS模型"],
96
+ "answer": "A",
97
+ "explanation": "解析能帮助学生理解错误原因并形成学习闭环。",
98
+ },
99
+ {
100
+ "question": "长论文处理通常更稳妥的做法是?",
101
+ "options": ["分块阅读后汇总", "一次性全部输入且不做控制", "只看标题", "随机抽样一页"],
102
+ "answer": "A",
103
+ "explanation": "分块可以降低上下文长度风险并提高稳定性。",
104
+ },
105
+ {
106
+ "question": "在你的课程Demo里,TTS最主要用于?",
107
+ "options": ["讲解和错题解析语音输出", "替代VL模型阅读PDF", "生成图片", "训练新LoRA"],
108
+ "answer": "A",
109
+ "explanation": "TTS负责文本转语音,增强演示交互体验。",
110
+ },
111
+ ]
112
+
113
+
114
+ def run_prototype(pdf_path: str, mock: bool = True) -> Dict[str, Any]:
115
+ pdf_path = str(Path(pdf_path))
116
+ document = extract_pdf_text(pdf_path)
117
+
118
+ lecture_prompt = LECTURE_PROMPT.format(document=document)
119
+ mcq_prompt = MCQ_PROMPT.format(document=document)
120
+
121
+ if mock:
122
+ lecture_text = _mock_lecture(document)
123
+ mcqs = _mock_mcqs()
124
+ else:
125
+ # Reuse the same backend implementation as app.py to keep notebook/app behavior aligned.
126
+ from app import QwenPipelineEngine, parse_mcq_json # local import avoids gradio setup cost until needed
127
+
128
+ engine = QwenPipelineEngine()
129
+ engine.mock_mode = False
130
+ engine.ensure_vl_loaded()
131
+ lecture_text = engine._real_generate_text_from_pdf(pdf_path, lecture_prompt)
132
+ raw_mcq_json = engine._real_generate_text_from_pdf(pdf_path, mcq_prompt)
133
+ mcqs = [q.__dict__ for q in parse_mcq_json(raw_mcq_json)]
134
+
135
+ return {
136
+ "pdf_path": pdf_path,
137
+ "document_excerpt": document[:2000],
138
+ "lecture_prompt": lecture_prompt,
139
+ "mcq_prompt": mcq_prompt,
140
+ "lecture_text": lecture_text,
141
+ "mcqs": mcqs,
142
+ }
143
+
144
+
145
+ def pretty_print_mcqs(mcqs: List[Dict[str, Any]]) -> None:
146
+ for i, q in enumerate(mcqs, start=1):
147
+ print(f"\\nQ{i}. {q['question']}")
148
+ for label, opt in zip(["A", "B", "C", "D"], q["options"]):
149
+ print(f" {label}. {opt}")
150
+ print(f"Answer: {q['answer']}")
151
+ print(f"Explanation: {q['explanation']}")
152
+
153
+
154
+ if __name__ == "__main__":
155
+ # Minimal local check (replace with a real PDF path).
156
+ print("Import this file in a notebook and call run_prototype('/path/to/file.pdf').")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ gradio
2
+ spaces
3
+ requests
4
+ pypdf
5
+ pypdfium2
6
+ Pillow