llexieguo commited on
Commit ·
ed25084
1
Parent(s): f718c5e
updated audio
Browse files- README.md +40 -1
- app.py +160 -13
- requirements.txt +1 -0
README.md
CHANGED
|
@@ -61,6 +61,46 @@ API_KEY="你的Key"
|
|
| 61 |
- 讲解/MCQ 使用 OpenAI-compatible `/chat/completions`
|
| 62 |
- TTS 优先尝试 `/audio/speech`,失败后回退 DashScope TTS 接口
|
| 63 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
## 角色目录结构(自动发现)
|
| 65 |
|
| 66 |
下拉/角色按钮会自动读取 `characters/` 下的所有子目录。
|
|
@@ -112,4 +152,3 @@ pip install pypdf
|
|
| 112 |
### 3. MCQ 生成失败 / JSON 错误
|
| 113 |
|
| 114 |
模型可能返回不完整 JSON,代码里已做重试与解析兜底;如果仍失败可重试一次或更换角色 prompt。
|
| 115 |
-
|
|
|
|
| 61 |
- 讲解/MCQ 使用 OpenAI-compatible `/chat/completions`
|
| 62 |
- TTS 优先尝试 `/audio/speech`,失败后回退 DashScope TTS 接口
|
| 63 |
|
| 64 |
+
## 使用同 Organization 的 HF Audio Space 做 TTS
|
| 65 |
+
|
| 66 |
+
你的 `audio` 项目已经暴露 API:`/tts_chunk(text, voice, language)`。
|
| 67 |
+
|
| 68 |
+
### 1. 部署 `/Users/lexi/workplace/audio` 到 HF Space
|
| 69 |
+
|
| 70 |
+
先在 Hugging Face 里创建组织下的 Space(例如 `your-org/audio`),然后推送代码:
|
| 71 |
+
|
| 72 |
+
```bash
|
| 73 |
+
cd /Users/lexi/workplace/audio
|
| 74 |
+
git remote add hf https://huggingface.co/spaces/your-org/audio
|
| 75 |
+
git push hf main
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
如果该 Space 是私有仓库,请在 HF 里创建一个可访问该组织 Space 的 token(read 权限即可调用)。
|
| 79 |
+
|
| 80 |
+
### 2. 在 `/Users/lexi/workplace/genai/.env` 配置调用
|
| 81 |
+
|
| 82 |
+
```env
|
| 83 |
+
# 讲解/MCQ 仍走 DashScope/OpenAI-compatible
|
| 84 |
+
API_UR="https://dashscope.aliyuncs.com/compatible-mode/v1"
|
| 85 |
+
API_KEY="你的Key"
|
| 86 |
+
USE_MOCK_MODELS=0
|
| 87 |
+
|
| 88 |
+
# TTS 走 HF Space(优先)
|
| 89 |
+
HF_TTS_SPACE_ID="your-org/audio"
|
| 90 |
+
# 如果是私有 Space,填 token;公开 Space 可不填
|
| 91 |
+
HF_TOKEN="hf_xxx"
|
| 92 |
+
HF_TTS_API_NAME="/tts_chunk"
|
| 93 |
+
HF_TTS_VOICE="male" # male 或 female
|
| 94 |
+
HF_TTS_LANGUAGE="Chinese"
|
| 95 |
+
# 1=HF失败时回退到原有TTS;0=只用HF,失败就报错
|
| 96 |
+
HF_TTS_ALLOW_FALLBACK=1
|
| 97 |
+
```
|
| 98 |
+
|
| 99 |
+
可选:
|
| 100 |
+
|
| 101 |
+
- 如果你更希望用完整 URL,可以改为 `HF_TTS_SPACE_URL="https://your-org-audio.hf.space"`。
|
| 102 |
+
- 如果不想回退到原 TTS 接口,设置 `HF_TTS_ALLOW_FALLBACK=0`。
|
| 103 |
+
|
| 104 |
## 角色目录结构(自动发现)
|
| 105 |
|
| 106 |
下拉/角色按钮会自动读取 `characters/` 下的所有子目录。
|
|
|
|
| 152 |
### 3. MCQ 生成失败 / JSON 错误
|
| 153 |
|
| 154 |
模型可能返回不完整 JSON,代码里已做重试与解析兜底;如果仍失败可重试一次或更换角色 prompt。
|
|
|
app.py
CHANGED
|
@@ -14,6 +14,11 @@ from typing import Any, Dict, List, Optional
|
|
| 14 |
import gradio as gr
|
| 15 |
import requests
|
| 16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
try:
|
| 18 |
import spaces # type: ignore
|
| 19 |
except Exception:
|
|
@@ -62,6 +67,18 @@ CHAT_MODEL_ID = os.getenv("QWEN_VL_MODEL_ID", "qwen-vl-max")
|
|
| 62 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 63 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
| 64 |
TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
API_TIMEOUT_SEC = int(os.getenv("API_TIMEOUT_SEC", "180"))
|
| 66 |
QWEN_VL_MAX_PAGES = int(os.getenv("QWEN_VL_MAX_PAGES", "4"))
|
| 67 |
QWEN_VL_RENDER_SCALE = float(os.getenv("QWEN_VL_RENDER_SCALE", "1.5"))
|
|
@@ -388,6 +405,54 @@ def _save_binary_audio(audio_bytes: bytes, out_path: str) -> str:
|
|
| 388 |
return out_path
|
| 389 |
|
| 390 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
def split_text_for_tts(text: str, max_len: int = 480) -> List[str]:
|
| 392 |
cleaned = re.sub(r"\s+", " ", (text or "")).strip()
|
| 393 |
if not cleaned:
|
|
@@ -456,7 +521,7 @@ class QwenPipelineEngine:
|
|
| 456 |
This ships with a mock mode by default so the workflow is runnable immediately.
|
| 457 |
When USE_MOCK_MODELS=0, it calls remote APIs:
|
| 458 |
- VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
|
| 459 |
-
- TTS:
|
| 460 |
"""
|
| 461 |
|
| 462 |
def __init__(self) -> None:
|
|
@@ -464,6 +529,7 @@ class QwenPipelineEngine:
|
|
| 464 |
self.vl_loaded = False
|
| 465 |
self.tts_loaded = False
|
| 466 |
self._pdf_page_cache: Dict[str, List[str]] = {}
|
|
|
|
| 467 |
|
| 468 |
def ensure_vl_loaded(self) -> None:
|
| 469 |
if self.vl_loaded:
|
|
@@ -479,6 +545,10 @@ class QwenPipelineEngine:
|
|
| 479 |
def ensure_tts_loaded(self) -> None:
|
| 480 |
if self.tts_loaded:
|
| 481 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
if self.mock_mode:
|
| 483 |
self.tts_loaded = True
|
| 484 |
return
|
|
@@ -487,16 +557,76 @@ class QwenPipelineEngine:
|
|
| 487 |
raise RuntimeError("Missing API_KEY for TTS API calls.")
|
| 488 |
self.tts_loaded = True
|
| 489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
def _mock_generate_lecture(self, pdf_excerpt: str) -> str:
|
| 491 |
excerpt = re.sub(r"\s+", " ", pdf_excerpt).strip()
|
| 492 |
excerpt = excerpt[:1000]
|
| 493 |
return (
|
| 494 |
-
"
|
| 495 |
-
"1. 论文问题与背景:该工作试图解决一个具体任务中的效率/性能/泛化问题,核心动机通常是现有方法在成本、准确性或可解释性方面存在不足。\n"
|
| 496 |
-
"2. 核心方法:作者提出新的模型结构、训练策略或推理流程,并通过若干模块协同完成任务。\n"
|
| 497 |
-
"3. 实验与结果:论文通常会在标准数据集上与基线比较,并报告性能提升、效率改善或更稳定的表现。\n"
|
| 498 |
-
"4. 局限与适用场景:方法可能依赖特定数据分布、计算资源或任务设定,迁移到新领域需要额外验证。\n\n"
|
| 499 |
-
f"论文节选(用于生成讲解): {excerpt}"
|
| 500 |
)
|
| 501 |
|
| 502 |
def _mock_generate_mcqs(self, lecture_text: str) -> List[MCQItem]:
|
|
@@ -609,6 +739,16 @@ class QwenPipelineEngine:
|
|
| 609 |
def _real_tts_single(self, text: str, out_path: str) -> str:
|
| 610 |
if not text.strip():
|
| 611 |
return write_tone_wav("empty", out_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 612 |
openai_url = f"{_require_api_url()}/audio/speech"
|
| 613 |
openai_payload = {
|
| 614 |
"model": TTS_MODEL_ID,
|
|
@@ -787,7 +927,7 @@ class QwenPipelineEngine:
|
|
| 787 |
def synthesize_tts(self, text: str, name_prefix: str = "audio") -> str:
|
| 788 |
self.ensure_tts_loaded()
|
| 789 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
| 790 |
-
if self.mock_mode:
|
| 791 |
return write_tone_wav(text, out_path)
|
| 792 |
return self._real_tts(text, out_path)
|
| 793 |
|
|
@@ -1576,14 +1716,20 @@ def play_lecture_audio(state: Dict[str, Any]):
|
|
| 1576 |
if not state.get("lecture_text"):
|
| 1577 |
state["status"] = "No lecture text available."
|
| 1578 |
return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
|
|
|
|
| 1579 |
try:
|
| 1580 |
-
state["status"] = "Generating lecture audio..."
|
| 1581 |
state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture")
|
| 1582 |
state["status"] = "Lecture audio ready."
|
| 1583 |
-
return state, state["status"], state["lecture_audio_path"], "Lecture audio generated."
|
| 1584 |
except Exception as exc:
|
| 1585 |
state["status"] = "Lecture audio generation failed."
|
| 1586 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1587 |
|
| 1588 |
|
| 1589 |
def play_explanation_audio(state: Dict[str, Any]):
|
|
@@ -2454,6 +2600,7 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2454 |
)
|
| 2455 |
with gr.Row(elem_id="lecture-actions"):
|
| 2456 |
play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
|
|
|
|
| 2457 |
with gr.Row(elem_id="exam-entry-wrap"):
|
| 2458 |
exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
|
| 2459 |
|
|
@@ -2591,8 +2738,8 @@ with gr.Blocks(css=CSS) as demo:
|
|
| 2591 |
play_lecture_btn.click(
|
| 2592 |
fn=play_lecture_audio,
|
| 2593 |
inputs=[state],
|
| 2594 |
-
outputs=[state, status_box, lecture_audio,
|
| 2595 |
-
show_progress="
|
| 2596 |
)
|
| 2597 |
|
| 2598 |
|
|
|
|
| 14 |
import gradio as gr
|
| 15 |
import requests
|
| 16 |
|
| 17 |
+
try:
|
| 18 |
+
from gradio_client import Client as HFSpaceClient
|
| 19 |
+
except Exception: # pragma: no cover
|
| 20 |
+
HFSpaceClient = None # type: ignore
|
| 21 |
+
|
| 22 |
try:
|
| 23 |
import spaces # type: ignore
|
| 24 |
except Exception:
|
|
|
|
| 67 |
TTS_MODEL_ID = os.getenv("QWEN_TTS_MODEL_ID", "qwen-tts")
|
| 68 |
TTS_SPEAKER = os.getenv("QWEN_TTS_SPEAKER", "longxiaochun_v2")
|
| 69 |
TTS_FORMAT = os.getenv("QWEN_TTS_FORMAT", "wav")
|
| 70 |
+
HF_TTS_SPACE_ID = os.getenv("HF_TTS_SPACE_ID", "").strip()
|
| 71 |
+
HF_TTS_SPACE_URL = os.getenv("HF_TTS_SPACE_URL", "").strip()
|
| 72 |
+
_hf_tts_api_name_raw = (os.getenv("HF_TTS_API_NAME", "/tts_chunk") or "").strip()
|
| 73 |
+
HF_TTS_API_NAME = f"/{_hf_tts_api_name_raw.lstrip('/')}" if _hf_tts_api_name_raw else "/tts_chunk"
|
| 74 |
+
HF_TTS_VOICE = os.getenv("HF_TTS_VOICE", "male")
|
| 75 |
+
HF_TTS_LANGUAGE = os.getenv("HF_TTS_LANGUAGE", "Chinese")
|
| 76 |
+
HF_TTS_ALLOW_FALLBACK = os.getenv("HF_TTS_ALLOW_FALLBACK", "1") == "1"
|
| 77 |
+
HF_TOKEN = (
|
| 78 |
+
os.getenv("HF_TOKEN")
|
| 79 |
+
or os.getenv("HUGGINGFACEHUB_API_TOKEN")
|
| 80 |
+
or os.getenv("HF_API_TOKEN", "")
|
| 81 |
+
)
|
| 82 |
API_TIMEOUT_SEC = int(os.getenv("API_TIMEOUT_SEC", "180"))
|
| 83 |
QWEN_VL_MAX_PAGES = int(os.getenv("QWEN_VL_MAX_PAGES", "4"))
|
| 84 |
QWEN_VL_RENDER_SCALE = float(os.getenv("QWEN_VL_RENDER_SCALE", "1.5"))
|
|
|
|
| 405 |
return out_path
|
| 406 |
|
| 407 |
|
| 408 |
+
def _is_hf_tts_enabled() -> bool:
|
| 409 |
+
return bool(HF_TTS_SPACE_ID or HF_TTS_SPACE_URL)
|
| 410 |
+
|
| 411 |
+
|
| 412 |
+
def _tts_backend_name() -> str:
|
| 413 |
+
if _is_hf_tts_enabled():
|
| 414 |
+
return f"hf_space:{HF_TTS_SPACE_ID or HF_TTS_SPACE_URL}"
|
| 415 |
+
if USE_MOCK_MODELS:
|
| 416 |
+
return "mock_tts"
|
| 417 |
+
return "api_tts"
|
| 418 |
+
|
| 419 |
+
|
| 420 |
+
def _extract_audio_source(result: Any) -> str:
|
| 421 |
+
if isinstance(result, str):
|
| 422 |
+
return result
|
| 423 |
+
if isinstance(result, dict):
|
| 424 |
+
for key in ("path", "name", "url"):
|
| 425 |
+
value = result.get(key)
|
| 426 |
+
if isinstance(value, str) and value.strip():
|
| 427 |
+
return value
|
| 428 |
+
nested = result.get("audio")
|
| 429 |
+
if nested is not None:
|
| 430 |
+
return _extract_audio_source(nested)
|
| 431 |
+
if isinstance(result, (list, tuple)):
|
| 432 |
+
for item in result:
|
| 433 |
+
try:
|
| 434 |
+
return _extract_audio_source(item)
|
| 435 |
+
except RuntimeError:
|
| 436 |
+
continue
|
| 437 |
+
raise RuntimeError(f"Unsupported HF Space audio output: {result!r}")
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
def _read_audio_bytes_from_source(source: str) -> bytes:
|
| 441 |
+
source = (source or "").strip()
|
| 442 |
+
if not source:
|
| 443 |
+
raise RuntimeError("HF Space returned an empty audio source.")
|
| 444 |
+
if source.startswith("http://") or source.startswith("https://"):
|
| 445 |
+
resp = requests.get(source, timeout=API_TIMEOUT_SEC)
|
| 446 |
+
if resp.status_code >= 400:
|
| 447 |
+
raise RuntimeError(f"Failed to fetch HF Space audio URL {resp.status_code}: {resp.text[:500]}")
|
| 448 |
+
return resp.content
|
| 449 |
+
|
| 450 |
+
path = Path(source)
|
| 451 |
+
if path.exists():
|
| 452 |
+
return path.read_bytes()
|
| 453 |
+
raise RuntimeError(f"HF Space audio path does not exist: {source}")
|
| 454 |
+
|
| 455 |
+
|
| 456 |
def split_text_for_tts(text: str, max_len: int = 480) -> List[str]:
|
| 457 |
cleaned = re.sub(r"\s+", " ", (text or "")).strip()
|
| 458 |
if not cleaned:
|
|
|
|
| 521 |
This ships with a mock mode by default so the workflow is runnable immediately.
|
| 522 |
When USE_MOCK_MODELS=0, it calls remote APIs:
|
| 523 |
- VL: OpenAI-compatible /chat/completions (works with DashScope compatible-mode and vLLM-style APIs)
|
| 524 |
+
- TTS: HF Space /tts_chunk (optional) or DashScope/OpenAI-compatible endpoints
|
| 525 |
"""
|
| 526 |
|
| 527 |
def __init__(self) -> None:
|
|
|
|
| 529 |
self.vl_loaded = False
|
| 530 |
self.tts_loaded = False
|
| 531 |
self._pdf_page_cache: Dict[str, List[str]] = {}
|
| 532 |
+
self._hf_tts_client: Any = None
|
| 533 |
|
| 534 |
def ensure_vl_loaded(self) -> None:
|
| 535 |
if self.vl_loaded:
|
|
|
|
| 545 |
def ensure_tts_loaded(self) -> None:
|
| 546 |
if self.tts_loaded:
|
| 547 |
return
|
| 548 |
+
if _is_hf_tts_enabled():
|
| 549 |
+
self._ensure_hf_tts_client()
|
| 550 |
+
self.tts_loaded = True
|
| 551 |
+
return
|
| 552 |
if self.mock_mode:
|
| 553 |
self.tts_loaded = True
|
| 554 |
return
|
|
|
|
| 557 |
raise RuntimeError("Missing API_KEY for TTS API calls.")
|
| 558 |
self.tts_loaded = True
|
| 559 |
|
| 560 |
+
def _ensure_hf_tts_client(self) -> Any:
|
| 561 |
+
if HFSpaceClient is None:
|
| 562 |
+
raise RuntimeError("Missing gradio_client. Please install with: pip install gradio_client")
|
| 563 |
+
if self._hf_tts_client is not None:
|
| 564 |
+
return self._hf_tts_client
|
| 565 |
+
src = HF_TTS_SPACE_URL or HF_TTS_SPACE_ID
|
| 566 |
+
if not src:
|
| 567 |
+
raise RuntimeError("Missing HF_TTS_SPACE_ID or HF_TTS_SPACE_URL.")
|
| 568 |
+
token = (HF_TOKEN or "").strip()
|
| 569 |
+
# gradio_client constructor args differ across versions; handle both old/new signatures.
|
| 570 |
+
if not token:
|
| 571 |
+
self._hf_tts_client = HFSpaceClient(src)
|
| 572 |
+
return self._hf_tts_client
|
| 573 |
+
try:
|
| 574 |
+
self._hf_tts_client = HFSpaceClient(src, hf_token=token)
|
| 575 |
+
except TypeError:
|
| 576 |
+
try:
|
| 577 |
+
self._hf_tts_client = HFSpaceClient(src, token=token)
|
| 578 |
+
except TypeError:
|
| 579 |
+
self._hf_tts_client = HFSpaceClient(src, headers={"Authorization": f"Bearer {token}"})
|
| 580 |
+
return self._hf_tts_client
|
| 581 |
+
|
| 582 |
+
def _hf_space_tts_single(self, text: str, out_path: str) -> str:
|
| 583 |
+
client = self._ensure_hf_tts_client()
|
| 584 |
+
configured = (HF_TTS_API_NAME or "").strip()
|
| 585 |
+
normalized = configured.lstrip("/")
|
| 586 |
+
api_candidates: List[str] = []
|
| 587 |
+
for cand in [configured, f"/{normalized}" if normalized else "", normalized, "/tts_chunk", "tts_chunk", "/predict", "predict"]:
|
| 588 |
+
cand = cand.strip()
|
| 589 |
+
if cand and cand not in api_candidates:
|
| 590 |
+
api_candidates.append(cand)
|
| 591 |
+
|
| 592 |
+
result: Any = None
|
| 593 |
+
last_exc: Optional[Exception] = None
|
| 594 |
+
for api_name in api_candidates:
|
| 595 |
+
try:
|
| 596 |
+
result = client.predict(
|
| 597 |
+
text,
|
| 598 |
+
HF_TTS_VOICE,
|
| 599 |
+
HF_TTS_LANGUAGE,
|
| 600 |
+
api_name=api_name,
|
| 601 |
+
)
|
| 602 |
+
last_exc = None
|
| 603 |
+
break
|
| 604 |
+
except Exception as exc:
|
| 605 |
+
msg = str(exc)
|
| 606 |
+
if "Cannot find a function with api_name" in msg:
|
| 607 |
+
last_exc = exc
|
| 608 |
+
continue
|
| 609 |
+
raise
|
| 610 |
+
if last_exc is not None:
|
| 611 |
+
available_hint = ""
|
| 612 |
+
view_api = getattr(client, "view_api", None)
|
| 613 |
+
if callable(view_api):
|
| 614 |
+
try:
|
| 615 |
+
api_info = view_api(return_format="dict")
|
| 616 |
+
available_hint = f" Available endpoints: {api_info}"
|
| 617 |
+
except Exception:
|
| 618 |
+
available_hint = ""
|
| 619 |
+
tried = ", ".join(api_candidates)
|
| 620 |
+
raise RuntimeError(f"No matching HF API endpoint. Tried: [{tried}].{available_hint}") from last_exc
|
| 621 |
+
source = _extract_audio_source(result)
|
| 622 |
+
audio_bytes = _read_audio_bytes_from_source(source)
|
| 623 |
+
return _save_binary_audio(audio_bytes, out_path)
|
| 624 |
+
|
| 625 |
def _mock_generate_lecture(self, pdf_excerpt: str) -> str:
|
| 626 |
excerpt = re.sub(r"\s+", " ", pdf_excerpt).strip()
|
| 627 |
excerpt = excerpt[:1000]
|
| 628 |
return (
|
| 629 |
+
f" {excerpt}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 630 |
)
|
| 631 |
|
| 632 |
def _mock_generate_mcqs(self, lecture_text: str) -> List[MCQItem]:
|
|
|
|
| 739 |
def _real_tts_single(self, text: str, out_path: str) -> str:
|
| 740 |
if not text.strip():
|
| 741 |
return write_tone_wav("empty", out_path)
|
| 742 |
+
|
| 743 |
+
if _is_hf_tts_enabled():
|
| 744 |
+
try:
|
| 745 |
+
return self._hf_space_tts_single(text, out_path)
|
| 746 |
+
except Exception as exc:
|
| 747 |
+
if not HF_TTS_ALLOW_FALLBACK:
|
| 748 |
+
raise RuntimeError(f"HF Space TTS failed and fallback is disabled: {type(exc).__name__}: {exc}")
|
| 749 |
+
if self.mock_mode:
|
| 750 |
+
return write_tone_wav(text, out_path)
|
| 751 |
+
|
| 752 |
openai_url = f"{_require_api_url()}/audio/speech"
|
| 753 |
openai_payload = {
|
| 754 |
"model": TTS_MODEL_ID,
|
|
|
|
| 927 |
def synthesize_tts(self, text: str, name_prefix: str = "audio") -> str:
|
| 928 |
self.ensure_tts_loaded()
|
| 929 |
out_path = str(TMP_DIR / f"{name_prefix}_{uuid.uuid4().hex}.wav")
|
| 930 |
+
if self.mock_mode and not _is_hf_tts_enabled():
|
| 931 |
return write_tone_wav(text, out_path)
|
| 932 |
return self._real_tts(text, out_path)
|
| 933 |
|
|
|
|
| 1716 |
if not state.get("lecture_text"):
|
| 1717 |
state["status"] = "No lecture text available."
|
| 1718 |
return state, state["status"], state.get("lecture_audio_path"), "Generate lecture first."
|
| 1719 |
+
backend = _tts_backend_name()
|
| 1720 |
try:
|
| 1721 |
+
state["status"] = f"Generating lecture audio ({backend})..."
|
| 1722 |
state["lecture_audio_path"] = engine.synthesize_tts(state["lecture_text"], name_prefix="lecture")
|
| 1723 |
state["status"] = "Lecture audio ready."
|
| 1724 |
+
return state, state["status"], state["lecture_audio_path"], f"Lecture audio generated via `{backend}`."
|
| 1725 |
except Exception as exc:
|
| 1726 |
state["status"] = "Lecture audio generation failed."
|
| 1727 |
+
return (
|
| 1728 |
+
state,
|
| 1729 |
+
state["status"],
|
| 1730 |
+
state.get("lecture_audio_path"),
|
| 1731 |
+
f"TTS error via `{backend}`: {type(exc).__name__}: {exc}",
|
| 1732 |
+
)
|
| 1733 |
|
| 1734 |
|
| 1735 |
def play_explanation_audio(state: Dict[str, Any]):
|
|
|
|
| 2600 |
)
|
| 2601 |
with gr.Row(elem_id="lecture-actions"):
|
| 2602 |
play_lecture_btn = gr.Button("Play Lecture Audio", interactive=False, scale=0)
|
| 2603 |
+
lecture_feedback = gr.Markdown("")
|
| 2604 |
with gr.Row(elem_id="exam-entry-wrap"):
|
| 2605 |
exam_btn = gr.Button("Go to Exam", interactive=False, variant="secondary", scale=0)
|
| 2606 |
|
|
|
|
| 2738 |
play_lecture_btn.click(
|
| 2739 |
fn=play_lecture_audio,
|
| 2740 |
inputs=[state],
|
| 2741 |
+
outputs=[state, status_box, lecture_audio, lecture_feedback],
|
| 2742 |
+
show_progress="minimal",
|
| 2743 |
)
|
| 2744 |
|
| 2745 |
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
gradio
|
|
|
|
| 2 |
spaces
|
| 3 |
requests
|
| 4 |
pypdf
|
|
|
|
| 1 |
gradio
|
| 2 |
+
gradio_client
|
| 3 |
spaces
|
| 4 |
requests
|
| 5 |
pypdf
|