akhaliq HF Staff commited on
Commit
00b19ca
·
1 Parent(s): 49d22cb

Migrate DramaBox to gradio.Server with a custom HTML/CSS/JS frontend

Browse files
Files changed (2) hide show
  1. app.py +42 -127
  2. index.html +1588 -0
app.py CHANGED
@@ -12,8 +12,13 @@ import tempfile
12
  import time
13
 
14
  import gradio as gr
 
 
 
 
15
  import spaces
16
 
 
17
  # Local src import.
18
  sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "src"))
19
  from inference_server import TTSServer # noqa: E402
@@ -164,149 +169,59 @@ EXAMPLES: list[tuple[str, str, str]] = [
164
  ]
165
 
166
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
  @spaces.GPU(duration=60)
168
- def on_generate(prompt: str, audio_ref, cfg: float, stg: float, dur_mult: float,
169
- gen_dur: float, ref_dur: float, seed: int):
 
 
 
 
 
 
 
 
170
  if not prompt or not prompt.strip():
171
  raise gr.Error("Prompt is empty.")
 
172
  t0 = time.time()
173
- ref_path = audio_ref if audio_ref and os.path.exists(str(audio_ref)) else None
 
174
  output = tempfile.mktemp(suffix=".wav", prefix="dramabox_")
175
  tts.generate_to_file(
176
  prompt=prompt,
177
  output=output,
178
  voice_ref=ref_path,
179
- cfg_scale=cfg, stg_scale=stg,
180
- duration_multiplier=dur_mult, seed=int(seed),
 
 
181
  gen_duration=float(gen_dur),
182
  ref_duration=float(ref_dur),
183
  )
184
  elapsed = time.time() - t0
185
  logging.info(f"Generated in {elapsed:.2f}s -> {output}")
186
- return output
187
-
188
-
189
- # ── UI ──────────────────────────────────────────────────────────────────────
190
- _BANNER_CSS = """
191
- .prompt-box textarea { font-size: 14px !important; line-height: 1.5 !important; }
192
- .ltx-banner {
193
- background: linear-gradient(90deg, #1a1f3a 0%, #2a1f3a 100%);
194
- border-left: 4px solid #ff6b35;
195
- padding: 10px 16px;
196
- margin: 0 0 12px 0;
197
- border-radius: 6px;
198
- color: #e8e8f0;
199
- font-size: 13px;
200
- line-height: 1.5;
201
- }
202
- .ltx-banner a { color: #ff9a6c; font-weight: 600; text-decoration: none; }
203
- .ltx-banner a:hover { text-decoration: underline; }
204
- .ltx-banner strong { color: #ffffff; }
205
- """
206
-
207
- with gr.Blocks(
208
- title="DramaBox — Expressive TTS",
209
- theme=gr.themes.Default(),
210
- css=_BANNER_CSS,
211
- analytics_enabled=False,
212
- ) as app:
213
- gr.Markdown("# 🎭 DramaBox — Expressive TTS with Voice Cloning")
214
- gr.HTML(
215
- '<div class="ltx-banner">'
216
- '🏗️&nbsp; Built on <a href="https://github.com/Lightricks/LTX-2">LTX-2</a> by '
217
- '<a href="https://huggingface.co/Lightricks">Lightricks</a>. '
218
- '<strong>DramaBox</strong> is <strong>Resemble AI\'s</strong> expressive TTS, '
219
- 'trained on top of the LTX-2.3 audio branch under the LTX-2 Community License. '
220
- 'Huge thanks to the Lightricks team for open-sourcing the base.'
221
- '</div>'
222
- )
223
- gr.Markdown(
224
- "Write a scene prompt, optionally upload a 10-second voice reference, "
225
- "and generate. Audio is automatically watermarked with "
226
- "[Resemble Perth](https://github.com/resemble-ai/Perth).\n\n"
227
- "**Tips:** put dialogue inside `\"double quotes\"`, scene directions outside. "
228
- "Phonetic sounds (`\"Hahaha\"`, `\"Mmmm\"`, `\"Ugh\"`) go inside quotes; named "
229
- "actions (`She sighs.`, `He clears his throat.`) go outside."
230
- )
231
-
232
- with gr.Row():
233
- with gr.Column(scale=3):
234
- prompt_box = gr.Textbox(
235
- label="Scene prompt",
236
- placeholder=EXAMPLES[0][2],
237
- lines=6, elem_classes=["prompt-box"],
238
- )
239
- audio_ref = gr.Audio(
240
- label="Voice reference (optional, 10+ seconds)",
241
- type="filepath",
242
- )
243
- gen_btn = gr.Button("Generate", variant="primary", size="lg")
244
-
245
- with gr.Column(scale=2):
246
- with gr.Accordion("Inference settings", open=False):
247
- cfg_slider = gr.Slider(1.0, 10.0, value=2.5, step=0.5, label="CFG scale")
248
- stg_slider = gr.Slider(0.0, 5.0, value=1.5, step=0.5, label="STG scale")
249
- dur_slider = gr.Slider(0.8, 2.0, value=1.1, step=0.05,
250
- label="Duration × (only used when target duration = 0)")
251
- gen_dur_slider = gr.Slider(0.0, 60.0, value=0.0, step=1.0,
252
- label="Target duration (s) — 0 = auto from prompt; "
253
- "set higher (≥20s) for long-form music or scenes")
254
- ref_dur_slider = gr.Slider(3.0, 30.0, value=10.0, step=1.0,
255
- label="Reference duration (s) — how many seconds of the "
256
- "uploaded voice reference the model conditions on")
257
- seed_input = gr.Number(value=42, label="Seed", precision=0)
258
- audio_out = gr.Audio(label="Generated audio", type="filepath")
259
- with gr.Accordion("Prompt writing guide", open=False):
260
- gr.Markdown(
261
- "**Structure:** `<speaker description>, \"<dialogue>\" <action> \"<more dialogue>\"`\n\n"
262
- "**Inside quotes** (model speaks them):\n"
263
- "- Dialogue: `\"Hello, how are you?\"`\n"
264
- "- Phonetic sounds: `\"Hahaha\"`, `\"Hehehe\"`, `\"Mmmmm\"`, `\"Ugh\"`, `\"Argh\"`\n\n"
265
- "**Outside quotes** (stage directions):\n"
266
- "- `She sighs deeply.`, `He gulps nervously.`, `A long pause.`\n"
267
- "- `Her voice cracks.`, `He clears his throat.`\n\n"
268
- "**Avoid inside quotes:** Ahem, Pfft, Sigh, Gasp, Cough — the model speaks them literally."
269
- )
270
-
271
- gen_btn.click(
272
- on_generate,
273
- inputs=[prompt_box, audio_ref, cfg_slider, stg_slider,
274
- dur_slider, gen_dur_slider, ref_dur_slider, seed_input],
275
- outputs=[audio_out],
276
- )
277
-
278
- # Click-to-generate example table. Each row preloads a paired voice
279
- # reference + prompt and runs the model immediately.
280
- gr.Examples(
281
- label="🎬 Click any row to generate a sample",
282
- examples=[
283
- # rows tagged "30s •" force a 30-second target duration; the rest
284
- # use the prompt-driven auto estimate (gen_dur = 0).
285
- [name, prompt, voice_path, 2.5, 1.5, 1.1,
286
- 30.0 if name.startswith("30s") else 0.0, 10.0, 42]
287
- for name, voice_path, prompt in EXAMPLES
288
- ],
289
- example_labels=[name for name, _, _ in EXAMPLES],
290
- inputs=[gr.Textbox(visible=False, label="Scene"),
291
- prompt_box, audio_ref,
292
- cfg_slider, stg_slider, dur_slider, gen_dur_slider,
293
- ref_dur_slider, seed_input],
294
- outputs=[audio_out],
295
- fn=lambda _name, prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed: on_generate(
296
- prompt, ref, cfg, stg, dur, gen_dur, ref_dur, seed),
297
- cache_examples=False,
298
- run_on_click=True,
299
- examples_per_page=20,
300
- )
301
 
302
 
303
  if __name__ == "__main__":
304
- # HF Spaces routes external traffic to container port 7860 by default.
305
- # Defaulting to 7861 caused the gateway to return 500 for every external request.
306
  port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
307
- app.queue(max_size=10).launch(
308
- server_name="0.0.0.0", server_port=port,
309
- share=os.environ.get("GRADIO_SHARE", "0") == "1",
310
- ssr_mode=False, # Gradio 5 SSR + ZeroGPU fork has known race conditions
311
- show_api=False, # don't auto-derive Python schemas (caused bool-iter / dict-cache crashes)
312
  )
 
 
12
  import time
13
 
14
  import gradio as gr
15
+ from fastapi.responses import HTMLResponse
16
+ from fastapi.staticfiles import StaticFiles
17
+ from gradio import Server
18
+ from gradio.data_classes import FileData
19
  import spaces
20
 
21
+
22
  # Local src import.
23
  sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "src"))
24
  from inference_server import TTSServer # noqa: E402
 
169
  ]
170
 
171
 
172
+ app = Server()
173
+
174
+ # Serve static voice files and images
175
+ app.mount("/assets", StaticFiles(directory="assets"), name="assets")
176
+
177
+
178
+ @app.get("/", response_class=HTMLResponse)
179
+ async def homepage():
180
+ html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
181
+ with open(html_path, "r", encoding="utf-8") as f:
182
+ return f.read()
183
+
184
+
185
+ @app.api()
186
  @spaces.GPU(duration=60)
187
+ def generate_audio(
188
+ prompt: str,
189
+ audio_ref: FileData | None,
190
+ cfg: float,
191
+ stg: float,
192
+ dur_mult: float,
193
+ gen_dur: float,
194
+ ref_dur: float,
195
+ seed: int
196
+ ) -> FileData:
197
  if not prompt or not prompt.strip():
198
  raise gr.Error("Prompt is empty.")
199
+
200
  t0 = time.time()
201
+ ref_path = audio_ref.path if audio_ref and audio_ref.path and os.path.exists(audio_ref.path) else None
202
+
203
  output = tempfile.mktemp(suffix=".wav", prefix="dramabox_")
204
  tts.generate_to_file(
205
  prompt=prompt,
206
  output=output,
207
  voice_ref=ref_path,
208
+ cfg_scale=cfg,
209
+ stg_scale=stg,
210
+ duration_multiplier=dur_mult,
211
+ seed=int(seed),
212
  gen_duration=float(gen_dur),
213
  ref_duration=float(ref_dur),
214
  )
215
  elapsed = time.time() - t0
216
  logging.info(f"Generated in {elapsed:.2f}s -> {output}")
217
+ return FileData(path=output)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
 
219
 
220
  if __name__ == "__main__":
 
 
221
  port = int(os.environ.get("GRADIO_SERVER_PORT", "7860"))
222
+ app.launch(
223
+ server_name="0.0.0.0",
224
+ server_port=port,
225
+ show_error=True
 
226
  )
227
+
index.html ADDED
@@ -0,0 +1,1588 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>🎭 DramaBox — Expressive TTS with Voice Cloning</title>
7
+
8
+ <!-- Meta tags for premium look and SEO -->
9
+ <meta name="description" content="Generate highly expressive speech with voice cloning. Powered by LTX-2.3 and Resemble Perth watermarking.">
10
+ <meta name="theme-color" content="#0d0f17">
11
+
12
+ <!-- Google Fonts: Outfit and Inter -->
13
+ <link rel="preconnect" href="https://fonts.googleapis.com">
14
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
15
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=Outfit:wght@400;500;600;700;800&display=swap" rel="stylesheet">
16
+
17
+ <style>
18
+ :root {
19
+ --bg-color: #080a10;
20
+ --panel-bg: rgba(18, 22, 38, 0.7);
21
+ --border-color: rgba(255, 255, 255, 0.08);
22
+ --text-primary: #f1f3f9;
23
+ --text-secondary: #9aa0b9;
24
+ --accent-orange: #ff6b35;
25
+ --accent-orange-glow: rgba(255, 107, 53, 0.4);
26
+ --accent-purple: #8b5cf6;
27
+ --accent-purple-glow: rgba(139, 92, 246, 0.4);
28
+ --accent-green: #10b981;
29
+ --radius-lg: 16px;
30
+ --radius-md: 10px;
31
+ --transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1);
32
+ }
33
+
34
+ * {
35
+ box-sizing: border-box;
36
+ margin: 0;
37
+ padding: 0;
38
+ }
39
+
40
+ body {
41
+ background-color: var(--bg-color);
42
+ color: var(--text-primary);
43
+ font-family: 'Inter', sans-serif;
44
+ min-height: 100vh;
45
+ line-height: 1.6;
46
+ overflow-x: hidden;
47
+ position: relative;
48
+ }
49
+
50
+ /* Animated glowing background patterns */
51
+ body::before {
52
+ content: '';
53
+ position: absolute;
54
+ top: -20%;
55
+ left: -10%;
56
+ width: 60%;
57
+ height: 60%;
58
+ background: radial-gradient(circle, var(--accent-orange-glow) 0%, transparent 70%);
59
+ z-index: -1;
60
+ filter: blur(100px);
61
+ pointer-events: none;
62
+ opacity: 0.6;
63
+ }
64
+
65
+ body::after {
66
+ content: '';
67
+ position: absolute;
68
+ bottom: -10%;
69
+ right: -10%;
70
+ width: 50%;
71
+ height: 50%;
72
+ background: radial-gradient(circle, var(--accent-purple-glow) 0%, transparent 75%);
73
+ z-index: -1;
74
+ filter: blur(100px);
75
+ pointer-events: none;
76
+ opacity: 0.4;
77
+ }
78
+
79
+ header {
80
+ max-width: 1280px;
81
+ margin: 0 auto;
82
+ padding: 30px 20px 10px 20px;
83
+ display: flex;
84
+ flex-direction: column;
85
+ align-items: center;
86
+ text-align: center;
87
+ }
88
+
89
+ h1 {
90
+ font-family: 'Outfit', sans-serif;
91
+ font-size: 2.8rem;
92
+ font-weight: 800;
93
+ background: linear-gradient(135deg, #ffffff 40%, #ff8e53 70%, #d49aff 100%);
94
+ -webkit-background-clip: text;
95
+ -webkit-text-fill-color: transparent;
96
+ margin-bottom: 8px;
97
+ letter-spacing: -1px;
98
+ display: flex;
99
+ align-items: center;
100
+ gap: 12px;
101
+ }
102
+
103
+ .subtitle {
104
+ font-size: 1.1rem;
105
+ color: var(--text-secondary);
106
+ font-weight: 400;
107
+ max-width: 600px;
108
+ margin-bottom: 20px;
109
+ }
110
+
111
+ .ltx-banner {
112
+ background: linear-gradient(90deg, rgba(26, 31, 58, 0.8) 0%, rgba(42, 31, 58, 0.8) 100%);
113
+ border-left: 4px solid var(--accent-orange);
114
+ border-radius: var(--radius-md);
115
+ padding: 12px 20px;
116
+ color: #e8e8f0;
117
+ font-size: 0.9rem;
118
+ max-width: 900px;
119
+ margin: 0 auto 30px auto;
120
+ backdrop-filter: blur(10px);
121
+ border-top: 1px solid var(--border-color);
122
+ border-right: 1px solid var(--border-color);
123
+ box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
124
+ text-align: left;
125
+ }
126
+
127
+ .ltx-banner a {
128
+ color: #ff9a6c;
129
+ font-weight: 600;
130
+ text-decoration: none;
131
+ transition: var(--transition);
132
+ }
133
+
134
+ .ltx-banner a:hover {
135
+ text-decoration: underline;
136
+ color: #ffffff;
137
+ }
138
+
139
+ .ltx-banner strong {
140
+ color: #ffffff;
141
+ }
142
+
143
+ main {
144
+ max-width: 1280px;
145
+ margin: 0 auto;
146
+ padding: 0 20px 60px 20px;
147
+ display: grid;
148
+ grid-template-columns: 1.2fr 1fr;
149
+ gap: 30px;
150
+ }
151
+
152
+ @media (max-width: 992px) {
153
+ main {
154
+ grid-template-columns: 1fr;
155
+ }
156
+ h1 {
157
+ font-size: 2.2rem;
158
+ }
159
+ }
160
+
161
+ .panel {
162
+ background: var(--panel-bg);
163
+ border: 1px solid var(--border-color);
164
+ border-radius: var(--radius-lg);
165
+ padding: 30px;
166
+ backdrop-filter: blur(16px);
167
+ box-shadow: 0 16px 40px rgba(0, 0, 0, 0.4);
168
+ display: flex;
169
+ flex-direction: column;
170
+ gap: 24px;
171
+ transition: var(--transition);
172
+ }
173
+
174
+ .panel:hover {
175
+ border-color: rgba(255, 255, 255, 0.12);
176
+ }
177
+
178
+ .panel-title {
179
+ font-family: 'Outfit', sans-serif;
180
+ font-size: 1.4rem;
181
+ font-weight: 700;
182
+ display: flex;
183
+ align-items: center;
184
+ gap: 10px;
185
+ color: #ffffff;
186
+ }
187
+
188
+ .form-group {
189
+ display: flex;
190
+ flex-direction: column;
191
+ gap: 8px;
192
+ }
193
+
194
+ .form-label {
195
+ font-size: 0.85rem;
196
+ font-weight: 600;
197
+ text-transform: uppercase;
198
+ letter-spacing: 0.5px;
199
+ color: var(--text-secondary);
200
+ display: flex;
201
+ justify-content: space-between;
202
+ }
203
+
204
+ .form-label .label-info {
205
+ font-size: 0.8rem;
206
+ color: var(--accent-orange);
207
+ text-transform: none;
208
+ font-weight: normal;
209
+ }
210
+
211
+ .textarea-custom {
212
+ width: 100%;
213
+ background: rgba(8, 10, 16, 0.6);
214
+ border: 1px solid var(--border-color);
215
+ border-radius: var(--radius-md);
216
+ padding: 16px;
217
+ color: var(--text-primary);
218
+ font-family: 'Inter', sans-serif;
219
+ font-size: 0.95rem;
220
+ line-height: 1.5;
221
+ resize: vertical;
222
+ min-height: 150px;
223
+ outline: none;
224
+ transition: var(--transition);
225
+ }
226
+
227
+ .textarea-custom:focus {
228
+ border-color: var(--accent-orange);
229
+ box-shadow: 0 0 15px rgba(255, 107, 53, 0.15);
230
+ background: rgba(8, 10, 16, 0.85);
231
+ }
232
+
233
+ /* Drag & Drop Audio Upload Uploader */
234
+ .upload-container {
235
+ border: 2px dashed rgba(255, 255, 255, 0.15);
236
+ border-radius: var(--radius-md);
237
+ padding: 24px;
238
+ text-align: center;
239
+ cursor: pointer;
240
+ background: rgba(255, 255, 255, 0.02);
241
+ transition: var(--transition);
242
+ display: flex;
243
+ flex-direction: column;
244
+ align-items: center;
245
+ justify-content: center;
246
+ gap: 10px;
247
+ min-height: 120px;
248
+ }
249
+
250
+ .upload-container:hover, .upload-container.dragover {
251
+ border-color: var(--accent-purple);
252
+ background: rgba(139, 92, 246, 0.05);
253
+ }
254
+
255
+ .upload-icon {
256
+ font-size: 2rem;
257
+ color: var(--text-secondary);
258
+ transition: var(--transition);
259
+ }
260
+
261
+ .upload-container:hover .upload-icon {
262
+ transform: translateY(-4px);
263
+ color: var(--accent-purple);
264
+ }
265
+
266
+ .upload-text {
267
+ font-size: 0.9rem;
268
+ color: var(--text-secondary);
269
+ }
270
+
271
+ .upload-text strong {
272
+ color: var(--text-primary);
273
+ }
274
+
275
+ .hidden-input {
276
+ display: none;
277
+ }
278
+
279
+ .uploaded-file-info {
280
+ display: flex;
281
+ align-items: center;
282
+ gap: 12px;
283
+ background: rgba(139, 92, 246, 0.1);
284
+ border: 1px solid rgba(139, 92, 246, 0.2);
285
+ padding: 10px 16px;
286
+ border-radius: var(--radius-md);
287
+ width: 100%;
288
+ justify-content: space-between;
289
+ }
290
+
291
+ .uploaded-file-details {
292
+ display: flex;
293
+ align-items: center;
294
+ gap: 10px;
295
+ font-size: 0.9rem;
296
+ font-weight: 500;
297
+ }
298
+
299
+ .clear-upload {
300
+ background: none;
301
+ border: none;
302
+ color: var(--text-secondary);
303
+ font-size: 1.1rem;
304
+ cursor: pointer;
305
+ transition: var(--transition);
306
+ padding: 2px;
307
+ display: flex;
308
+ align-items: center;
309
+ justify-content: center;
310
+ }
311
+
312
+ .clear-upload:hover {
313
+ color: #ffffff;
314
+ transform: scale(1.1);
315
+ }
316
+
317
+ /* Sliders / Advanced Settings */
318
+ .accordion-btn {
319
+ background: rgba(255, 255, 255, 0.03);
320
+ border: 1px solid var(--border-color);
321
+ border-radius: var(--radius-md);
322
+ padding: 12px 18px;
323
+ color: var(--text-primary);
324
+ font-weight: 600;
325
+ font-size: 0.9rem;
326
+ display: flex;
327
+ justify-content: space-between;
328
+ align-items: center;
329
+ cursor: pointer;
330
+ transition: var(--transition);
331
+ width: 100%;
332
+ outline: none;
333
+ }
334
+
335
+ .accordion-btn:hover {
336
+ background: rgba(255, 255, 255, 0.06);
337
+ border-color: rgba(255, 255, 255, 0.15);
338
+ }
339
+
340
+ .accordion-icon {
341
+ font-size: 0.8rem;
342
+ transition: var(--transition);
343
+ }
344
+
345
+ .accordion-content {
346
+ max-height: 0;
347
+ overflow: hidden;
348
+ transition: max-height 0.4s cubic-bezier(0.4, 0, 0.2, 1);
349
+ display: flex;
350
+ flex-direction: column;
351
+ gap: 18px;
352
+ padding: 0 4px;
353
+ }
354
+
355
+ .accordion-content.open {
356
+ max-height: 500px;
357
+ margin-top: 15px;
358
+ }
359
+
360
+ .slider-group {
361
+ display: flex;
362
+ flex-direction: column;
363
+ gap: 6px;
364
+ }
365
+
366
+ .slider-header {
367
+ display: flex;
368
+ justify-content: space-between;
369
+ font-size: 0.85rem;
370
+ color: var(--text-secondary);
371
+ font-weight: 500;
372
+ }
373
+
374
+ .slider-val {
375
+ color: var(--text-primary);
376
+ font-weight: 700;
377
+ }
378
+
379
+ input[type="range"] {
380
+ -webkit-appearance: none;
381
+ width: 100%;
382
+ height: 6px;
383
+ background: rgba(255, 255, 255, 0.08);
384
+ border-radius: 3px;
385
+ outline: none;
386
+ transition: var(--transition);
387
+ }
388
+
389
+ input[type="range"]::-webkit-slider-thumb {
390
+ -webkit-appearance: none;
391
+ width: 16px;
392
+ height: 16px;
393
+ border-radius: 50%;
394
+ background: var(--accent-orange);
395
+ cursor: pointer;
396
+ box-shadow: 0 0 10px var(--accent-orange-glow);
397
+ transition: var(--transition);
398
+ }
399
+
400
+ input[type="range"]::-webkit-slider-thumb:hover {
401
+ transform: scale(1.2);
402
+ background: #ffffff;
403
+ }
404
+
405
+ .seed-input-container {
406
+ display: flex;
407
+ gap: 10px;
408
+ align-items: center;
409
+ }
410
+
411
+ .input-number {
412
+ flex: 1;
413
+ background: rgba(8, 10, 16, 0.6);
414
+ border: 1px solid var(--border-color);
415
+ border-radius: var(--radius-md);
416
+ padding: 10px;
417
+ color: var(--text-primary);
418
+ font-family: inherit;
419
+ outline: none;
420
+ transition: var(--transition);
421
+ }
422
+
423
+ .input-number:focus {
424
+ border-color: var(--accent-orange);
425
+ }
426
+
427
+ .btn-icon {
428
+ background: rgba(255, 255, 255, 0.05);
429
+ border: 1px solid var(--border-color);
430
+ border-radius: var(--radius-md);
431
+ color: var(--text-primary);
432
+ height: 42px;
433
+ width: 42px;
434
+ display: flex;
435
+ align-items: center;
436
+ justify-content: center;
437
+ cursor: pointer;
438
+ transition: var(--transition);
439
+ }
440
+
441
+ .btn-icon:hover {
442
+ background: rgba(255, 255, 255, 0.1);
443
+ border-color: rgba(255, 255, 255, 0.2);
444
+ }
445
+
446
+ /* Generate Button */
447
+ .btn-primary {
448
+ background: linear-gradient(135deg, var(--accent-orange) 0%, #ff8e53 100%);
449
+ border: none;
450
+ border-radius: var(--radius-md);
451
+ color: #ffffff;
452
+ font-family: 'Outfit', sans-serif;
453
+ font-size: 1.1rem;
454
+ font-weight: 700;
455
+ padding: 16px;
456
+ cursor: pointer;
457
+ transition: var(--transition);
458
+ display: flex;
459
+ align-items: center;
460
+ justify-content: center;
461
+ gap: 12px;
462
+ box-shadow: 0 8px 24px rgba(255, 107, 53, 0.25);
463
+ margin-top: 10px;
464
+ position: relative;
465
+ overflow: hidden;
466
+ }
467
+
468
+ .btn-primary:hover:not(:disabled) {
469
+ transform: translateY(-2px);
470
+ box-shadow: 0 12px 30px rgba(255, 107, 53, 0.4);
471
+ filter: brightness(1.05);
472
+ }
473
+
474
+ .btn-primary:active:not(:disabled) {
475
+ transform: translateY(0);
476
+ }
477
+
478
+ .btn-primary:disabled {
479
+ background: #252836;
480
+ color: #636882;
481
+ cursor: not-allowed;
482
+ box-shadow: none;
483
+ }
484
+
485
+ /* Right Panel: Output & Controls */
486
+ .output-card {
487
+ background: rgba(8, 10, 16, 0.4);
488
+ border: 1px solid var(--border-color);
489
+ border-radius: var(--radius-md);
490
+ padding: 24px;
491
+ display: flex;
492
+ flex-direction: column;
493
+ align-items: center;
494
+ justify-content: center;
495
+ min-height: 200px;
496
+ position: relative;
497
+ gap: 16px;
498
+ }
499
+
500
+ .output-empty {
501
+ color: var(--text-secondary);
502
+ text-align: center;
503
+ display: flex;
504
+ flex-direction: column;
505
+ align-items: center;
506
+ gap: 12px;
507
+ }
508
+
509
+ .output-empty-icon {
510
+ font-size: 2.5rem;
511
+ opacity: 0.5;
512
+ }
513
+
514
+ /* Beautiful Custom Audio Player */
515
+ .custom-player {
516
+ width: 100%;
517
+ display: flex;
518
+ flex-direction: column;
519
+ gap: 16px;
520
+ }
521
+
522
+ .visualizer-container {
523
+ width: 100%;
524
+ height: 60px;
525
+ background: linear-gradient(90deg, rgba(139, 92, 246, 0.05) 0%, rgba(255, 107, 53, 0.05) 100%);
526
+ border-radius: var(--radius-md);
527
+ display: flex;
528
+ align-items: center;
529
+ justify-content: center;
530
+ border: 1px solid rgba(255, 255, 255, 0.03);
531
+ position: relative;
532
+ overflow: hidden;
533
+ }
534
+
535
+ .visualizer-wave {
536
+ display: flex;
537
+ align-items: center;
538
+ gap: 3px;
539
+ height: 100%;
540
+ }
541
+
542
+ .wave-bar {
543
+ width: 3px;
544
+ height: 8px;
545
+ background: var(--accent-purple);
546
+ border-radius: 1px;
547
+ transition: var(--transition);
548
+ }
549
+
550
+ .custom-player.playing .wave-bar {
551
+ animation: bounce 1.2s infinite ease-in-out alternate;
552
+ }
553
+
554
+ /* Animation for visualizer bars */
555
+ @keyframes bounce {
556
+ 0% { height: 8px; }
557
+ 100% { height: 40px; }
558
+ }
559
+
560
+ .wave-bar:nth-child(2n) { background: var(--accent-orange); animation-delay: 0.15s; }
561
+ .wave-bar:nth-child(3n) { animation-delay: 0.3s; }
562
+ .wave-bar:nth-child(4n) { animation-delay: 0.45s; }
563
+ .wave-bar:nth-child(5n) { background: var(--accent-purple); animation-delay: 0.6s; }
564
+
565
+ .player-controls {
566
+ display: flex;
567
+ align-items: center;
568
+ gap: 16px;
569
+ width: 100%;
570
+ }
571
+
572
+ .play-btn {
573
+ background: #ffffff;
574
+ border: none;
575
+ color: var(--bg-color);
576
+ width: 44px;
577
+ height: 44px;
578
+ border-radius: 50%;
579
+ display: flex;
580
+ align-items: center;
581
+ justify-content: center;
582
+ font-size: 1.1rem;
583
+ cursor: pointer;
584
+ transition: var(--transition);
585
+ box-shadow: 0 4px 15px rgba(255, 255, 255, 0.2);
586
+ flex-shrink: 0;
587
+ }
588
+
589
+ .play-btn:hover {
590
+ transform: scale(1.05);
591
+ box-shadow: 0 6px 20px rgba(255, 255, 255, 0.4);
592
+ }
593
+
594
+ .time-slider-container {
595
+ flex: 1;
596
+ display: flex;
597
+ align-items: center;
598
+ gap: 10px;
599
+ }
600
+
601
+ .time-label {
602
+ font-size: 0.8rem;
603
+ color: var(--text-secondary);
604
+ font-family: monospace;
605
+ min-width: 35px;
606
+ }
607
+
608
+ .extra-player-controls {
609
+ display: flex;
610
+ align-items: center;
611
+ justify-content: space-between;
612
+ width: 100%;
613
+ border-top: 1px solid rgba(255, 255, 255, 0.05);
614
+ padding-top: 14px;
615
+ }
616
+
617
+ .volume-container {
618
+ display: flex;
619
+ align-items: center;
620
+ gap: 8px;
621
+ max-width: 120px;
622
+ }
623
+
624
+ .volume-icon {
625
+ font-size: 0.9rem;
626
+ color: var(--text-secondary);
627
+ }
628
+
629
+ .speed-control {
630
+ display: flex;
631
+ align-items: center;
632
+ gap: 6px;
633
+ }
634
+
635
+ .speed-btn {
636
+ background: rgba(255, 255, 255, 0.03);
637
+ border: 1px solid var(--border-color);
638
+ border-radius: 6px;
639
+ color: var(--text-secondary);
640
+ font-size: 0.75rem;
641
+ font-weight: 600;
642
+ padding: 4px 8px;
643
+ cursor: pointer;
644
+ transition: var(--transition);
645
+ }
646
+
647
+ .speed-btn.active, .speed-btn:hover {
648
+ background: rgba(255, 255, 255, 0.1);
649
+ color: #ffffff;
650
+ border-color: rgba(255, 255, 255, 0.25);
651
+ }
652
+
653
+ .btn-download {
654
+ background: rgba(255, 255, 255, 0.06);
655
+ border: 1px solid var(--border-color);
656
+ border-radius: var(--radius-md);
657
+ color: #ffffff;
658
+ font-size: 0.85rem;
659
+ font-weight: 600;
660
+ padding: 8px 16px;
661
+ text-decoration: none;
662
+ display: flex;
663
+ align-items: center;
664
+ gap: 8px;
665
+ transition: var(--transition);
666
+ }
667
+
668
+ .btn-download:hover {
669
+ background: #ffffff;
670
+ color: var(--bg-color);
671
+ transform: translateY(-1px);
672
+ }
673
+
674
+ /* Queue & Status Indicator */
675
+ .status-container {
676
+ display: flex;
677
+ align-items: center;
678
+ gap: 12px;
679
+ font-size: 0.9rem;
680
+ font-weight: 500;
681
+ padding: 12px 18px;
682
+ border-radius: var(--radius-md);
683
+ border: 1px solid transparent;
684
+ display: none; /* Shown dynamically */
685
+ }
686
+
687
+ .status-container.success {
688
+ background: rgba(16, 185, 129, 0.1);
689
+ border-color: rgba(16, 185, 129, 0.2);
690
+ color: var(--accent-green);
691
+ }
692
+
693
+ .status-container.info {
694
+ background: rgba(139, 92, 246, 0.1);
695
+ border-color: rgba(139, 92, 246, 0.2);
696
+ color: #a78bfa;
697
+ }
698
+
699
+ .status-container.error {
700
+ background: rgba(239, 68, 68, 0.1);
701
+ border-color: rgba(239, 68, 68, 0.2);
702
+ color: #f87171;
703
+ }
704
+
705
+ .spinner {
706
+ width: 18px;
707
+ height: 18px;
708
+ border: 2px solid rgba(255, 255, 255, 0.1);
709
+ border-top: 2px solid currentColor;
710
+ border-radius: 50%;
711
+ animation: spin 0.8s linear infinite;
712
+ }
713
+
714
+ @keyframes spin {
715
+ 0% { transform: rotate(0deg); }
716
+ 100% { transform: rotate(360deg); }
717
+ }
718
+
719
+ /* Examples Section */
720
+ .examples-section {
721
+ display: flex;
722
+ flex-direction: column;
723
+ gap: 14px;
724
+ }
725
+
726
+ .examples-title {
727
+ font-family: 'Outfit', sans-serif;
728
+ font-size: 1.1rem;
729
+ font-weight: 700;
730
+ color: #ffffff;
731
+ display: flex;
732
+ align-items: center;
733
+ gap: 8px;
734
+ }
735
+
736
+ .examples-list {
737
+ display: flex;
738
+ flex-direction: column;
739
+ gap: 10px;
740
+ max-height: 380px;
741
+ overflow-y: auto;
742
+ padding-right: 4px;
743
+ }
744
+
745
+ /* Custom scrollbar for examples */
746
+ .examples-list::-webkit-scrollbar {
747
+ width: 6px;
748
+ }
749
+
750
+ .examples-list::-webkit-scrollbar-track {
751
+ background: rgba(255, 255, 255, 0.02);
752
+ border-radius: 3px;
753
+ }
754
+
755
+ .examples-list::-webkit-scrollbar-thumb {
756
+ background: rgba(255, 255, 255, 0.1);
757
+ border-radius: 3px;
758
+ }
759
+
760
+ .example-item {
761
+ background: rgba(255, 255, 255, 0.02);
762
+ border: 1px solid var(--border-color);
763
+ border-radius: var(--radius-md);
764
+ padding: 12px 16px;
765
+ cursor: pointer;
766
+ transition: var(--transition);
767
+ display: flex;
768
+ flex-direction: column;
769
+ gap: 6px;
770
+ text-align: left;
771
+ }
772
+
773
+ .example-item:hover {
774
+ background: rgba(255, 255, 255, 0.06);
775
+ border-color: rgba(255, 255, 255, 0.15);
776
+ transform: translateX(2px);
777
+ }
778
+
779
+ .example-item.active {
780
+ border-color: var(--accent-orange);
781
+ background: rgba(255, 107, 53, 0.04);
782
+ }
783
+
784
+ .example-header {
785
+ display: flex;
786
+ justify-content: space-between;
787
+ align-items: center;
788
+ }
789
+
790
+ .example-name {
791
+ font-weight: 600;
792
+ font-size: 0.9rem;
793
+ color: #ffffff;
794
+ }
795
+
796
+ .example-badges {
797
+ display: flex;
798
+ gap: 6px;
799
+ }
800
+
801
+ .badge {
802
+ font-size: 0.7rem;
803
+ font-weight: 700;
804
+ padding: 2px 6px;
805
+ border-radius: 4px;
806
+ text-transform: uppercase;
807
+ }
808
+
809
+ .badge-male { background: rgba(59, 130, 246, 0.15); color: #60a5fa; }
810
+ .badge-female { background: rgba(236, 72, 153, 0.15); color: #f472b6; }
811
+ .badge-long { background: rgba(139, 92, 246, 0.15); color: #a78bfa; }
812
+
813
+ .example-preview {
814
+ font-size: 0.8rem;
815
+ color: var(--text-secondary);
816
+ overflow: hidden;
817
+ text-overflow: ellipsis;
818
+ white-space: nowrap;
819
+ }
820
+
821
+ /* Prompt Writing Guide Styles */
822
+ .guide-container {
823
+ border-top: 1px solid rgba(255, 255, 255, 0.08);
824
+ padding-top: 24px;
825
+ display: flex;
826
+ flex-direction: column;
827
+ gap: 14px;
828
+ }
829
+
830
+ .guide-header {
831
+ font-size: 0.95rem;
832
+ font-weight: 600;
833
+ color: #ffffff;
834
+ cursor: pointer;
835
+ display: flex;
836
+ justify-content: space-between;
837
+ align-items: center;
838
+ user-select: none;
839
+ }
840
+
841
+ .guide-body {
842
+ max-height: 0;
843
+ overflow: hidden;
844
+ transition: max-height 0.4s cubic-bezier(0.4, 0, 0.2, 1);
845
+ font-size: 0.85rem;
846
+ color: var(--text-secondary);
847
+ display: flex;
848
+ flex-direction: column;
849
+ gap: 12px;
850
+ line-height: 1.5;
851
+ }
852
+
853
+ .guide-body.open {
854
+ max-height: 400px;
855
+ margin-top: 10px;
856
+ }
857
+
858
+ .guide-section-title {
859
+ color: #ffffff;
860
+ font-weight: 600;
861
+ margin-bottom: 2px;
862
+ font-size: 0.85rem;
863
+ }
864
+
865
+ .guide-list {
866
+ padding-left: 18px;
867
+ display: flex;
868
+ flex-direction: column;
869
+ gap: 4px;
870
+ }
871
+ </style>
872
+ </head>
873
+ <body>
874
+
875
+ <header>
876
+ <h1>🎭 DramaBox</h1>
877
+ <div class="subtitle">Expressive TTS with Voice Cloning</div>
878
+ <div class="ltx-banner">
879
+ 🏗️&nbsp; Built on <a href="https://github.com/Lightricks/LTX-2" target="_blank">LTX-2</a> by
880
+ <a href="https://huggingface.co/Lightricks" target="_blank">Lightricks</a>.
881
+ <strong>DramaBox</strong> is <strong>Resemble AI's</strong> expressive TTS,
882
+ trained on top of the LTX-2.3 audio branch under the LTX-2 Community License.
883
+ Huge thanks to the Lightricks team for open-sourcing the base.
884
+ </div>
885
+ </header>
886
+
887
+ <main>
888
+ <!-- Left Panel: Form Inputs -->
889
+ <section class="panel">
890
+ <div class="panel-title">
891
+ <span>🪄</span> Voice Generator
892
+ </div>
893
+
894
+ <!-- Scene Prompt Input -->
895
+ <div class="form-group">
896
+ <div class="form-label">
897
+ <span>Scene Prompt</span>
898
+ <span class="label-info">Put speech in "quotes", directions outside</span>
899
+ </div>
900
+ <textarea
901
+ id="scene-prompt"
902
+ class="textarea-custom"
903
+ placeholder='A shadowy villain speaks with cold menace, "You have entered my domain, mortal." He chuckles darkly, "Such arrogance will be your undoing."'
904
+ ></textarea>
905
+ </div>
906
+
907
+ <!-- Voice Reference Uploader -->
908
+ <div class="form-group">
909
+ <div class="form-label">
910
+ <span>Voice Reference (Optional)</span>
911
+ <span>10+ seconds recommended</span>
912
+ </div>
913
+
914
+ <div id="dropzone" class="upload-container">
915
+ <span class="upload-icon">📤</span>
916
+ <div class="upload-text">
917
+ <strong>Click to upload</strong> or drag & drop<br>
918
+ <span>Supports MP3, WAV, M4A, etc.</span>
919
+ </div>
920
+ </div>
921
+ <input type="file" id="audio-file" class="hidden-input" accept="audio/*">
922
+ </div>
923
+
924
+ <!-- Inference Settings Accordion -->
925
+ <div>
926
+ <button type="button" class="accordion-btn" id="accordion-toggle">
927
+ <span>⚙️ Advanced Settings</span>
928
+ <span class="accordion-icon" id="accordion-arrow">▼</span>
929
+ </button>
930
+
931
+ <div class="accordion-content" id="accordion-panel">
932
+ <!-- CFG Scale -->
933
+ <div class="slider-group">
934
+ <div class="slider-header">
935
+ <span>CFG Scale</span>
936
+ <span class="slider-val" id="val-cfg">2.5</span>
937
+ </div>
938
+ <input type="range" id="cfg" min="1.0" max="10.0" step="0.5" value="2.5">
939
+ </div>
940
+
941
+ <!-- STG Scale -->
942
+ <div class="slider-group">
943
+ <div class="slider-header">
944
+ <span>STG Scale</span>
945
+ <span class="slider-val" id="val-stg">1.5</span>
946
+ </div>
947
+ <input type="range" id="stg" min="0.0" max="5.0" step="0.5" value="1.5">
948
+ </div>
949
+
950
+ <!-- Duration Multiplier -->
951
+ <div class="slider-group">
952
+ <div class="slider-header">
953
+ <span>Duration Multiplier</span>
954
+ <span class="slider-val" id="val-dur">1.10</span>
955
+ </div>
956
+ <input type="range" id="dur" min="0.8" max="2.0" step="0.05" value="1.1">
957
+ </div>
958
+
959
+ <!-- Target Duration -->
960
+ <div class="slider-group">
961
+ <div class="slider-header">
962
+ <span>Target Duration (s) — 0 = Auto</span>
963
+ <span class="slider-val" id="val-gendur">0.0</span>
964
+ </div>
965
+ <input type="range" id="gendur" min="0.0" max="60.0" step="1.0" value="0.0">
966
+ </div>
967
+
968
+ <!-- Reference Duration -->
969
+ <div class="slider-group">
970
+ <div class="slider-header">
971
+ <span>Reference Duration (s)</span>
972
+ <span class="slider-val" id="val-refdur">10.0</span>
973
+ </div>
974
+ <input type="range" id="refdur" min="3.0" max="30.0" step="1.0" value="10.0">
975
+ </div>
976
+
977
+ <!-- Seed Input -->
978
+ <div class="form-group">
979
+ <span class="form-label">Generation Seed</span>
980
+ <div class="seed-input-container">
981
+ <input type="number" id="seed" class="input-number" value="42">
982
+ <button type="button" id="btn-random-seed" class="btn-icon" title="Randomize Seed">🎲</button>
983
+ </div>
984
+ </div>
985
+ </div>
986
+ </div>
987
+
988
+ <!-- Generate Button -->
989
+ <button id="btn-generate" class="btn-primary">
990
+ <span>⚡</span> Generate Speech
991
+ </button>
992
+ </section>
993
+
994
+ <!-- Right Panel: Output & Examples -->
995
+ <section class="panel">
996
+ <div class="panel-title">
997
+ <span>🔊</span> Output Room
998
+ </div>
999
+
1000
+ <!-- Status Indicator -->
1001
+ <div id="status-box" class="status-container">
1002
+ <div class="spinner"></div>
1003
+ <span id="status-text">Connecting to DramaBox engine...</span>
1004
+ </div>
1005
+
1006
+ <!-- Output Container -->
1007
+ <div class="output-card">
1008
+ <audio id="audio-element" style="display:none;"></audio>
1009
+
1010
+ <!-- Empty State -->
1011
+ <div id="output-empty-state" class="output-empty">
1012
+ <span class="output-empty-state-icon" style="font-size: 3rem;">🎛️</span>
1013
+ <p>Enter a prompt and hit generate to voice your script</p>
1014
+ </div>
1015
+
1016
+ <!-- Custom Elegant Audio Player (Initially hidden) -->
1017
+ <div id="custom-player" class="custom-player" style="display: none;">
1018
+ <div class="visualizer-container">
1019
+ <div class="visualizer-wave">
1020
+ <span class="wave-bar"></span>
1021
+ <span class="wave-bar"></span>
1022
+ <span class="wave-bar"></span>
1023
+ <span class="wave-bar"></span>
1024
+ <span class="wave-bar"></span>
1025
+ <span class="wave-bar"></span>
1026
+ <span class="wave-bar"></span>
1027
+ <span class="wave-bar"></span>
1028
+ <span class="wave-bar"></span>
1029
+ <span class="wave-bar"></span>
1030
+ <span class="wave-bar"></span>
1031
+ <span class="wave-bar"></span>
1032
+ <span class="wave-bar"></span>
1033
+ <span class="wave-bar"></span>
1034
+ <span class="wave-bar"></span>
1035
+ <span class="wave-bar"></span>
1036
+ <span class="wave-bar"></span>
1037
+ <span class="wave-bar"></span>
1038
+ <span class="wave-bar"></span>
1039
+ <span class="wave-bar"></span>
1040
+ </div>
1041
+ </div>
1042
+
1043
+ <div class="player-controls">
1044
+ <button type="button" id="player-play" class="play-btn">▶</button>
1045
+ <div class="time-slider-container">
1046
+ <span id="player-current-time" class="time-label">00:00</span>
1047
+ <input type="range" id="player-progress" min="0" max="100" value="0">
1048
+ <span id="player-duration" class="time-label">00:00</span>
1049
+ </div>
1050
+ </div>
1051
+
1052
+ <div class="extra-player-controls">
1053
+ <!-- Volume -->
1054
+ <div class="volume-container">
1055
+ <span class="volume-icon">🔊</span>
1056
+ <input type="range" id="player-volume" min="0" max="1" step="0.1" value="0.8">
1057
+ </div>
1058
+
1059
+ <!-- Speed Controls -->
1060
+ <div class="speed-control">
1061
+ <button type="button" class="speed-btn" data-speed="0.8">0.8x</button>
1062
+ <button type="button" class="speed-btn active" data-speed="1.0">1.0x</button>
1063
+ <button type="button" class="speed-btn" data-speed="1.2">1.2x</button>
1064
+ <button type="button" class="speed-btn" data-speed="1.5">1.5x</button>
1065
+ </div>
1066
+
1067
+ <!-- Download Link -->
1068
+ <a id="player-download" class="btn-download" href="#" download="dramabox_voice.wav">
1069
+ <span>📥</span> Download
1070
+ </a>
1071
+ </div>
1072
+ </div>
1073
+ </div>
1074
+
1075
+ <!-- Example Section -->
1076
+ <div class="examples-section">
1077
+ <div class="examples-title">
1078
+ <span>🎬</span> Quick Demos
1079
+ </div>
1080
+ <div class="examples-list" id="examples-container">
1081
+ <!-- Loaded dynamically in JavaScript -->
1082
+ </div>
1083
+ </div>
1084
+
1085
+ <!-- Prompt Writing Guide -->
1086
+ <div class="guide-container">
1087
+ <div class="guide-header" id="guide-toggle">
1088
+ <span>📖 Prompt Writing Guide</span>
1089
+ <span class="accordion-icon" id="guide-arrow">▼</span>
1090
+ </div>
1091
+
1092
+ <div class="guide-body" id="guide-body">
1093
+ <div>
1094
+ <div class="guide-section-title">Structure Pattern</div>
1095
+ <p><code>&lt;speaker description&gt;, "&lt;dialogue&gt;" &lt;action&gt; "&lt;more dialogue&gt;"</code></p>
1096
+ </div>
1097
+ <div>
1098
+ <div class="guide-section-title">Inside Double Quotes (Spoken)</div>
1099
+ <ul class="guide-list">
1100
+ <li>Standard speech dialogue: <code>"Hello, how are you today?"</code></li>
1101
+ <li>Phonetic sounds and laughs: <code>"Hahaha"</code>, <code>"Hehehe"</code>, <code>"Mmmmm"</code>, <code>"Ugh"</code>, <code>"Argh"</code></li>
1102
+ </ul>
1103
+ </div>
1104
+ <div>
1105
+ <div class="guide-section-title">Outside Double Quotes (Stage Directions)</div>
1106
+ <ul class="guide-list">
1107
+ <li>Physical movements and breaths: <code>She sighs deeply.</code>, <code>He gulps nervously.</code>, <code>A long pause.</code></li>
1108
+ <li>Voice adjustments: <code>Her voice cracks.</code>, <code>He clears his throat.</code></li>
1109
+ </ul>
1110
+ </div>
1111
+ <div>
1112
+ <div class="guide-section-title">Common Pitfalls</div>
1113
+ <p>Avoid putting explicit words like <i>Sigh</i>, <i>Cough</i>, or <i>Gasp</i> inside quotes. The model will literally pronounce them. Put them outside quotes as directions!</p>
1114
+ </div>
1115
+ </div>
1116
+ </div>
1117
+ </section>
1118
+ </main>
1119
+
1120
+ <!-- Connect Gradio Client via Module Script -->
1121
+ <script type="module">
1122
+ import { Client, handle_file } from "https://cdn.jsdelivr.net/npm/@gradio/client/dist/index.min.js";
1123
+
1124
+ // Examples Dataset
1125
+ const EXAMPLES = [
1126
+ {
1127
+ name: "Villain monologue",
1128
+ gender: "male",
1129
+ badge: "Male voice",
1130
+ voice: "/assets/voices/male_harvey_keitel.mp3",
1131
+ prompt: 'A shadowy villain speaks with cold menace, "You have entered my domain, mortal." He chuckles darkly, "Such arrogance will be your undoing." His voice rises with fury, "Kneel, or be destroyed where you stand!"',
1132
+ duration: 0.0
1133
+ },
1134
+ {
1135
+ name: "Talk-show host wheeze-laugh",
1136
+ gender: "male",
1137
+ badge: "Wheeze laugh",
1138
+ voice: "/assets/voices/male_conan.mp3",
1139
+ prompt: 'A talk show host gasps with shock, "No! You did NOT just say that!" He bursts into uncontrollable laughter, "Hahaha! Oh my god, oh my god!" He wheezes, "I cannot, I literally cannot breathe right now!"',
1140
+ duration: 0.0
1141
+ },
1142
+ {
1143
+ name: "Tender goodnight whisper",
1144
+ gender: "female",
1145
+ badge: "Whisper",
1146
+ voice: "/assets/voices/female_shadowheart.wav",
1147
+ prompt: 'A woman speaks tenderly, "It has been a long day, my love." She whispers, "Close your eyes. I am right here." She hums quietly, "Mmmm-mmm. Sleep now."',
1148
+ duration: 0.0
1149
+ },
1150
+ {
1151
+ name: "Old-school radio anchor",
1152
+ gender: "male",
1153
+ badge: "Anchor",
1154
+ voice: "/assets/voices/male_old_movie.wav",
1155
+ prompt: 'A radio host clears his throat, "Excuse me, pardon that." He settles into a warm, professional tone, "Good evening everyone, and welcome back to the show. We have got a wonderful lineup tonight."',
1156
+ duration: 0.0
1157
+ },
1158
+ {
1159
+ name: "Catgirl uncontrollable giggling",
1160
+ gender: "female",
1161
+ badge: "Giggling",
1162
+ voice: "/assets/voices/female_american.wav",
1163
+ prompt: 'A playful girl already mid-giggle, "Hehehe, oh my gosh you should see your face!" She gasps for air between giggles, "Oh my, hehe, oh my, I cannot stop!" She tries to compose herself, "Ahhhhh okay okay okay, I will stop, I promise."',
1164
+ duration: 0.0
1165
+ },
1166
+ {
1167
+ name: "Hero stammering courage",
1168
+ gender: "male",
1169
+ badge: "Heroic",
1170
+ voice: "/assets/voices/male_arnie.mp3",
1171
+ prompt: 'A young warrior speaks with a trembling voice, "I... I do not know if I can do this." He takes a shaky breath, "But someone has to try." His voice steadies with growing fire, "No more running. I WILL fight!"',
1172
+ duration: 0.0
1173
+ },
1174
+ {
1175
+ name: "Exhausted dad, fraying patience",
1176
+ gender: "male",
1177
+ badge: "Frustrated",
1178
+ voice: "/assets/voices/male_petergriffin.wav",
1179
+ prompt: 'An exhausted father speaks with fraying patience, "Sweetie, daddy is asking very nicely." He sighs deeply, "Ohhhh my goodness." He puts on an overly cheerful voice, "Hey buddy! Look at the shiny thing!" Then he laughs helplessly, "Hahaha, I am losing my mind."',
1180
+ duration: 0.0
1181
+ },
1182
+ {
1183
+ name: "Smug-confident announcer",
1184
+ gender: "male",
1185
+ badge: "Announcer",
1186
+ voice: "/assets/voices/male_samuel_j.mp3",
1187
+ prompt: 'A confident announcer speaks proudly, "And now, the moment you have all been waiting for." He chuckles knowingly, "Heheh, trust me, this one is going to blow you away."',
1188
+ duration: 0.0
1189
+ },
1190
+ {
1191
+ name: "30s • Villain soliloquy",
1192
+ gender: "male",
1193
+ badge: "30s clip",
1194
+ voice: "/assets/voices/male_harvey_keitel.mp3",
1195
+ prompt: 'A shadowy villain stands at the edge of his throne room, gazing into the dark. He speaks with slow, measured menace, "So, the little hero has come to finish me, has he?" He chuckles low and humourless, "Hehe, oh how delightfully predictable you mortals are." His voice hardens into ice, "I have lived ten thousand years. I have seen empires rise and fall like the tide." He scoffs, "And you think you, with your borrowed sword and your trembling hands, will be the one to end me?" A long pause. He whispers, almost tenderly, "I will give you a single chance to turn around and walk away." Then his voice rises with crushing finality, "Choose, child. The door behind you, or the grave at your feet."',
1196
+ duration: 30.0
1197
+ },
1198
+ {
1199
+ name: "30s • Late-night radio monologue",
1200
+ gender: "male",
1201
+ badge: "30s clip",
1202
+ voice: "/assets/voices/male_old_movie.wav",
1203
+ prompt: 'A radio host clears his throat softly into the microphone in the late hours of the night. He settles into a warm, smoky tone, "Good evening, dear listeners, and welcome back to the After Hours Hour." He sighs contentedly, "Mmm, what a night it has been. The rain is tapping at my window like an old friend." He chuckles softly, "Heheh, you know the kind of friend, the one that always shows up unannounced." His voice drops, intimate, "I want you to lean back, wherever you are. Pour yourself something warm." He pauses, breath audible, "Tonight we are going to talk about love, and loss, and the songs that hold us together." A smile in his voice, "And I have got the perfect record cued up to start us off, so stay right where you are."',
1204
+ duration: 30.0
1205
+ },
1206
+ {
1207
+ name: "30s • Bedtime story",
1208
+ gender: "female",
1209
+ badge: "30s clip",
1210
+ voice: "/assets/voices/female_shadowheart.wav",
1211
+ prompt: 'A mother sits at the edge of her child\'s bed in the dim glow of a single lamp. She speaks softly, "Once upon a time, in a kingdom by the sea, there lived a small dragon named Pip." She lowers her voice playfully, "Now Pip was not like the other dragons. Pip was afraid of fire." She smiles warmly, "Mmm, can you imagine? A dragon who was afraid of his own breath?" A gentle pause, "But Pip had something the other dragons did not have. Pip had courage in his heart." She hums softly, "Mmmmm. And one cold winter night, when the village below ran out of warmth..." Her voice drops to a whisper, "Pip closed his eyes, took a deep, deep breath, and remembered who he was."',
1212
+ duration: 30.0
1213
+ }
1214
+ ];
1215
+
1216
+ let client = null;
1217
+ let selectedAudioFile = null;
1218
+ let selectedAudioFilename = "";
1219
+
1220
+ // Elements
1221
+ const statusBox = document.getElementById("status-box");
1222
+ const statusText = document.getElementById("status-text");
1223
+ const btnGenerate = document.getElementById("btn-generate");
1224
+
1225
+ const scenePrompt = document.getElementById("scene-prompt");
1226
+ const dropzone = document.getElementById("dropzone");
1227
+ const audioFileInput = document.getElementById("audio-file");
1228
+
1229
+ // Advanced Accordion
1230
+ const accordionToggle = document.getElementById("accordion-toggle");
1231
+ const accordionPanel = document.getElementById("accordion-panel");
1232
+ const accordionArrow = document.getElementById("accordion-arrow");
1233
+
1234
+ // Sliders
1235
+ const sliderCfg = document.getElementById("cfg");
1236
+ const valCfg = document.getElementById("val-cfg");
1237
+ const sliderStg = document.getElementById("stg");
1238
+ const valStg = document.getElementById("val-stg");
1239
+ const sliderDur = document.getElementById("dur");
1240
+ const valDur = document.getElementById("val-dur");
1241
+ const sliderGenDur = document.getElementById("gendur");
1242
+ const valGenDur = document.getElementById("val-gendur");
1243
+ const sliderRefDur = document.getElementById("refdur");
1244
+ const valRefDur = document.getElementById("val-refdur");
1245
+
1246
+ const inputSeed = document.getElementById("seed");
1247
+ const btnRandomSeed = document.getElementById("btn-random-seed");
1248
+
1249
+ // Player Elements
1250
+ const audioElement = document.getElementById("audio-element");
1251
+ const outputEmptyState = document.getElementById("output-empty-state");
1252
+ const customPlayer = document.getElementById("custom-player");
1253
+ const playerPlay = document.getElementById("player-play");
1254
+ const playerProgress = document.getElementById("player-progress");
1255
+ const playerCurrentTime = document.getElementById("player-current-time");
1256
+ const playerDuration = document.getElementById("player-duration");
1257
+ const playerVolume = document.getElementById("player-volume");
1258
+ const playerDownload = document.getElementById("player-download");
1259
+ const speedButtons = document.querySelectorAll(".speed-btn");
1260
+
1261
+ // Guide Accordion
1262
+ const guideToggle = document.getElementById("guide-toggle");
1263
+ const guideBody = document.getElementById("guide-body");
1264
+ const guideArrow = document.getElementById("guide-arrow");
1265
+
1266
+ // Status Updater
1267
+ function updateStatus(message, type = "info", showLoading = true) {
1268
+ statusBox.style.display = "flex";
1269
+ statusBox.className = `status-container ${type}`;
1270
+ statusText.innerText = message;
1271
+
1272
+ const spinner = statusBox.querySelector(".spinner");
1273
+ if (showLoading) {
1274
+ spinner.style.display = "block";
1275
+ } else {
1276
+ spinner.style.display = "none";
1277
+ }
1278
+ }
1279
+
1280
+ function hideStatus() {
1281
+ statusBox.style.display = "none";
1282
+ }
1283
+
1284
+ // Initialize Gradio JS Client
1285
+ async function connectClient() {
1286
+ try {
1287
+ updateStatus("Connecting to DramaBox server...", "info");
1288
+ client = await Client.connect(window.location.origin);
1289
+ updateStatus("Engine connected and ready", "success", false);
1290
+ setTimeout(hideStatus, 3000);
1291
+ } catch (err) {
1292
+ console.error(err);
1293
+ updateStatus("Engine connection failed. Please reload.", "error", false);
1294
+ }
1295
+ }
1296
+
1297
+ // Accordion Management
1298
+ accordionToggle.addEventListener("click", () => {
1299
+ const isOpen = accordionPanel.classList.toggle("open");
1300
+ accordionArrow.innerText = isOpen ? "▲" : "▼";
1301
+ });
1302
+
1303
+ guideToggle.addEventListener("click", () => {
1304
+ const isOpen = guideBody.classList.toggle("open");
1305
+ guideArrow.innerText = isOpen ? "▲" : "▼";
1306
+ });
1307
+
1308
+ // Sliders Realtime Output updates
1309
+ sliderCfg.addEventListener("input", (e) => valCfg.innerText = parseFloat(e.target.value).toFixed(1));
1310
+ sliderStg.addEventListener("input", (e) => valStg.innerText = parseFloat(e.target.value).toFixed(1));
1311
+ sliderDur.addEventListener("input", (e) => valDur.innerText = parseFloat(e.target.value).toFixed(2));
1312
+ sliderGenDur.addEventListener("input", (e) => valGenDur.innerText = parseFloat(e.target.value).toFixed(1));
1313
+ sliderRefDur.addEventListener("input", (e) => valRefDur.innerText = parseFloat(e.target.value).toFixed(1));
1314
+
1315
+ // Seed Randomizer
1316
+ btnRandomSeed.addEventListener("click", () => {
1317
+ const randomSeed = Math.floor(Math.random() * 99999999);
1318
+ inputSeed.value = randomSeed;
1319
+ btnRandomSeed.style.transform = "rotate(360deg)";
1320
+ setTimeout(() => btnRandomSeed.style.transform = "none", 400);
1321
+ });
1322
+
1323
+ // Uploader Handlers
1324
+ dropzone.addEventListener("click", () => audioFileInput.click());
1325
+
1326
+ dropzone.addEventListener("dragover", (e) => {
1327
+ e.preventDefault();
1328
+ dropzone.classList.add("dragover");
1329
+ });
1330
+
1331
+ dropzone.addEventListener("dragleave", () => {
1332
+ dropzone.classList.remove("dragover");
1333
+ });
1334
+
1335
+ dropzone.addEventListener("drop", (e) => {
1336
+ e.preventDefault();
1337
+ dropzone.classList.remove("dragover");
1338
+ const files = e.dataTransfer.files;
1339
+ if (files.length > 0) {
1340
+ handleUploadedFile(files[0]);
1341
+ }
1342
+ });
1343
+
1344
+ audioFileInput.addEventListener("change", (e) => {
1345
+ if (e.target.files.length > 0) {
1346
+ handleUploadedFile(e.target.files[0]);
1347
+ }
1348
+ });
1349
+
1350
+ function handleUploadedFile(file) {
1351
+ selectedAudioFile = file;
1352
+ selectedAudioFilename = file.name;
1353
+ renderUploadedUI(file.name);
1354
+ }
1355
+
1356
+ function renderUploadedUI(filename) {
1357
+ dropzone.innerHTML = `
1358
+ <div class="uploaded-file-info">
1359
+ <div class="uploaded-file-details">
1360
+ <span>🎵</span>
1361
+ <span style="word-break: break-all; text-align: left;">${filename}</span>
1362
+ </div>
1363
+ <button type="button" class="clear-upload" id="btn-clear-upload" title="Remove voice file">✕</button>
1364
+ </div>
1365
+ `;
1366
+ document.getElementById("btn-clear-upload").addEventListener("click", (e) => {
1367
+ e.stopPropagation();
1368
+ clearUploadedFile();
1369
+ });
1370
+ }
1371
+
1372
+ function clearUploadedFile() {
1373
+ selectedAudioFile = null;
1374
+ selectedAudioFilename = "";
1375
+ audioFileInput.value = "";
1376
+ dropzone.innerHTML = `
1377
+ <span class="upload-icon">📤</span>
1378
+ <div class="upload-text">
1379
+ <strong>Click to upload</strong> or drag & drop<br>
1380
+ <span>Supports MP3, WAV, M4A, etc.</span>
1381
+ </div>
1382
+ `;
1383
+ }
1384
+
1385
+ // Fetch Example voice and load it into file variable
1386
+ async function loadExampleVoice(voicePath, originalFilename) {
1387
+ try {
1388
+ updateStatus("Cloning example voice reference...", "info");
1389
+ const response = await fetch(voicePath);
1390
+ if (!response.ok) throw new Error("Failed to fetch audio resource.");
1391
+
1392
+ const blob = await response.blob();
1393
+ selectedAudioFile = new File([blob], originalFilename, { type: blob.type || "audio/mpeg" });
1394
+ selectedAudioFilename = originalFilename;
1395
+
1396
+ renderUploadedUI(originalFilename);
1397
+ updateStatus("Voice reference loaded successfully", "success", false);
1398
+ setTimeout(hideStatus, 2000);
1399
+ } catch (err) {
1400
+ console.error(err);
1401
+ updateStatus("Could not fetch the example voice reference.", "error", false);
1402
+ }
1403
+ }
1404
+
1405
+ // Populate Examples in dynamic Grid
1406
+ const examplesContainer = document.getElementById("examples-container");
1407
+ EXAMPLES.forEach((ex, idx) => {
1408
+ const item = document.createElement("div");
1409
+ item.className = "example-item";
1410
+
1411
+ const isLong = ex.name.startsWith("30s");
1412
+ const badgeHtml = `
1413
+ <span class="badge ${ex.gender === "male" ? "badge-male" : "badge-female"}">${ex.gender}</span>
1414
+ ${isLong ? '<span class="badge badge-long">30s scenes</span>' : ''}
1415
+ `;
1416
+
1417
+ item.innerHTML = `
1418
+ <div class="example-header">
1419
+ <span class="example-name">${ex.name}</span>
1420
+ <div class="example-badges">${badgeHtml}</div>
1421
+ </div>
1422
+ <div class="example-preview">${ex.prompt}</div>
1423
+ `;
1424
+
1425
+ item.addEventListener("click", () => {
1426
+ // Highlight example
1427
+ document.querySelectorAll(".example-item").forEach(el => el.classList.remove("active"));
1428
+ item.classList.add("active");
1429
+
1430
+ // Populate Fields
1431
+ scenePrompt.value = ex.prompt;
1432
+ sliderGenDur.value = ex.duration;
1433
+ valGenDur.innerText = ex.duration.toFixed(1);
1434
+
1435
+ // Extrapolate simple voice name
1436
+ const filename = ex.voice.substring(ex.voice.lastIndexOf('/') + 1);
1437
+ loadExampleVoice(ex.voice, filename);
1438
+ });
1439
+
1440
+ examplesContainer.appendChild(item);
1441
+ });
1442
+
1443
+ // HTML5 Audio Custom Player controls
1444
+ let isAudioLoaded = false;
1445
+
1446
+ function setupAudioElement(src) {
1447
+ audioElement.src = src;
1448
+ audioElement.load();
1449
+ isAudioLoaded = true;
1450
+
1451
+ outputEmptyState.style.display = "none";
1452
+ customPlayer.style.display = "flex";
1453
+
1454
+ // Set Player Initial state
1455
+ playerPlay.innerText = "▶";
1456
+ customPlayer.classList.remove("playing");
1457
+ playerProgress.value = 0;
1458
+
1459
+ // Download button URL update
1460
+ playerDownload.href = src;
1461
+ }
1462
+
1463
+ audioElement.addEventListener("loadedmetadata", () => {
1464
+ playerDuration.innerText = formatTime(audioElement.duration);
1465
+ });
1466
+
1467
+ audioElement.addEventListener("timeupdate", () => {
1468
+ if (audioElement.duration) {
1469
+ const percent = (audioElement.currentTime / audioElement.duration) * 100;
1470
+ playerProgress.value = percent;
1471
+ playerCurrentTime.innerText = formatTime(audioElement.currentTime);
1472
+ }
1473
+ });
1474
+
1475
+ audioElement.addEventListener("ended", () => {
1476
+ playerPlay.innerText = "▶";
1477
+ customPlayer.classList.remove("playing");
1478
+ playerProgress.value = 0;
1479
+ playerCurrentTime.innerText = "00:00";
1480
+ });
1481
+
1482
+ playerPlay.addEventListener("click", () => {
1483
+ if (!isAudioLoaded) return;
1484
+
1485
+ if (audioElement.paused) {
1486
+ audioElement.play();
1487
+ playerPlay.innerText = "⏸";
1488
+ customPlayer.classList.add("playing");
1489
+ } else {
1490
+ audioElement.pause();
1491
+ playerPlay.innerText = "▶";
1492
+ customPlayer.classList.remove("playing");
1493
+ }
1494
+ });
1495
+
1496
+ playerProgress.addEventListener("input", (e) => {
1497
+ if (!isAudioLoaded || !audioElement.duration) return;
1498
+ const newTime = (e.target.value / 100) * audioElement.duration;
1499
+ audioElement.currentTime = newTime;
1500
+ });
1501
+
1502
+ playerVolume.addEventListener("input", (e) => {
1503
+ audioElement.volume = e.target.value;
1504
+ });
1505
+
1506
+ speedButtons.forEach(btn => {
1507
+ btn.addEventListener("click", () => {
1508
+ speedButtons.forEach(b => b.classList.remove("active"));
1509
+ btn.classList.add("active");
1510
+ audioElement.playbackRate = parseFloat(btn.dataset.speed);
1511
+ });
1512
+ });
1513
+
1514
+ function formatTime(secs) {
1515
+ const minutes = Math.floor(secs / 60);
1516
+ const seconds = Math.floor(secs % 60);
1517
+ const returnedSeconds = seconds < 10 ? `0${seconds}` : `${seconds}`;
1518
+ const returnedMinutes = minutes < 10 ? `0${minutes}` : `${minutes}`;
1519
+ return `${returnedMinutes}:${returnedSeconds}`;
1520
+ }
1521
+
1522
+ // Action Trigger: Generator Click
1523
+ btnGenerate.addEventListener("click", async () => {
1524
+ const promptVal = scenePrompt.value.trim();
1525
+ if (!promptVal) {
1526
+ updateStatus("Please write a scene prompt first.", "error", false);
1527
+ return;
1528
+ }
1529
+
1530
+ if (!client) {
1531
+ updateStatus("DramaBox engine is still connecting, please wait.", "error", false);
1532
+ return;
1533
+ }
1534
+
1535
+ // Gather values
1536
+ const cfg = parseFloat(sliderCfg.value);
1537
+ const stg = parseFloat(sliderStg.value);
1538
+ const durMult = parseFloat(sliderDur.value);
1539
+ const genDur = parseFloat(sliderGenDur.value);
1540
+ const refDur = parseFloat(sliderRefDur.value);
1541
+ const seed = parseInt(inputSeed.value);
1542
+
1543
+ try {
1544
+ // UI Disable State
1545
+ btnGenerate.disabled = true;
1546
+ btnGenerate.innerHTML = `<div class="spinner"></div> Generating Voice...`;
1547
+ updateStatus("Preparing models & queue slot...", "info");
1548
+
1549
+ let uploadedFileData = null;
1550
+ if (selectedAudioFile) {
1551
+ uploadedFileData = handle_file(selectedAudioFile);
1552
+ }
1553
+
1554
+ // Call Gradio queued api
1555
+ const predictResponse = await client.predict("/generate_audio", {
1556
+ prompt: promptVal,
1557
+ audio_ref: uploadedFileData,
1558
+ cfg: cfg,
1559
+ stg: stg,
1560
+ dur_mult: durMult,
1561
+ gen_dur: genDur,
1562
+ ref_dur: refDur,
1563
+ seed: seed
1564
+ });
1565
+
1566
+ if (predictResponse && predictResponse.data && predictResponse.data.length > 0) {
1567
+ const audioUrl = predictResponse.data[0].url;
1568
+ setupAudioElement(audioUrl);
1569
+ updateStatus("Speech generation complete!", "success", false);
1570
+ setTimeout(hideStatus, 3000);
1571
+ } else {
1572
+ throw new Error("No output returned from the generation server.");
1573
+ }
1574
+
1575
+ } catch (err) {
1576
+ console.error(err);
1577
+ updateStatus(err.message || "Speech generation failed. Please try again.", "error", false);
1578
+ } finally {
1579
+ btnGenerate.disabled = false;
1580
+ btnGenerate.innerHTML = `<span>⚡</span> Generate Speech`;
1581
+ }
1582
+ });
1583
+
1584
+ // Auto Connect on Load
1585
+ connectClient();
1586
+ </script>
1587
+ </body>
1588
+ </html>