File size: 29,484 Bytes
7c7db6a
 
 
 
 
 
 
 
 
 
 
 
 
 
24b2854
7c7db6a
 
 
c287b6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
7388985
adb7693
 
 
 
 
 
 
 
 
7388985
 
 
 
 
 
adb7693
 
 
 
 
 
 
7388985
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
adb7693
 
 
 
 
 
7c7db6a
 
 
a1d8cb5
adb7693
a1d8cb5
 
 
7c7db6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a1d8cb5
adb7693
c287b6a
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
c287b6a
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
0ef5d8d
adb7693
 
a1d8cb5
adb7693
a1d8cb5
adb7693
a1d8cb5
 
adb7693
 
a1d8cb5
 
adb7693
 
 
 
 
 
a1d8cb5
adb7693
a1d8cb5
adb7693
 
 
 
 
 
 
 
 
a1d8cb5
adb7693
 
 
 
a1d8cb5
adb7693
 
c287b6a
a1d8cb5
7c7db6a
adb7693
7c7db6a
 
 
 
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c7db6a
adb7693
 
 
 
 
 
 
0ef5d8d
adb7693
 
 
 
0ef5d8d
adb7693
 
 
 
 
0ef5d8d
7c7db6a
adb7693
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
0ef5d8d
adb7693
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c287b6a
adb7693
 
 
 
2d0e666
7c7db6a
adb7693
 
 
7c7db6a
fdfc10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
0ef5d8d
fdfc10d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
"""Per-tab Gradio component builders + shared output panel.

Each builder returns a dict of components keyed by purpose so app.py wires
events without depending on Gradio's positional return order.

NOTE: builders DO NOT instantiate the surrounding gr.Group / pane — they
ONLY build the form + output components inside it. app.py wraps the
result in pane_generate / pane_cover / etc.
"""

from __future__ import annotations

import gradio as gr

import lora_stack
import tooltips


def _build_advanced_accordion(components: dict[str, gr.components.Component]) -> None:
    """Advanced controls accordion shared by all four song modes.

    User complaint: "no matter what prompt I write, style is not deviating
    by a lot". Root cause: ``GenerationParams.inference_steps`` defaults
    to 8 (ACE-Step turbo) — too few for the XL SFT model to actually
    express prompt variation. ``guidance_scale``, ``infer_method``,
    ``shift``, ``use_adg``, and the CoT flags were all left at dataclass
    defaults too. This accordion surfaces the ~21 most useful knobs in
    four logical groups so the user can lock-and-iterate.

    Each song-mode pane (Generate / Cover / Extend / Edit) calls this
    right after ``_build_lora_accordion(components)`` so the layout is
    consistent. The Lyrics tab does NOT get this — it's a Qwen path with
    its own LM-params accordion already.
    """
    with gr.Accordion(
        label="Advanced",
        open=False,
        elem_classes=["ams-advanced"],
    ):
        # --- Group A — Diffusion (most impactful) ---
        gr.Markdown("**Diffusion**", elem_classes=["ams-adv-section"])
        components["adv_inference_steps"] = gr.Slider(
            minimum=8,
            maximum=80,
            value=27,
            step=1,
            label="Inference steps",
            info="More steps → richer detail. 8 is turbo, 27-60 is the sweet spot for XL SFT.",
        )
        components["adv_guidance_scale"] = gr.Slider(
            minimum=1.0,
            maximum=15.0,
            value=7.0,
            step=0.5,
            label="Guidance scale (CFG)",
            info="Higher = follow the prompt more strictly. Lower = more creative / weirder.",
        )
        components["adv_infer_method"] = gr.Radio(
            choices=["ode", "sde"],
            value="ode",
            label="Inference method",
            info="ode = deterministic per seed. sde = injects stochastic noise per step → genuinely different outputs each run.",
        )
        components["adv_seed"] = gr.Number(
            value=-1,
            precision=0,
            label="Seed",
            info="-1 = randomize each run. Set a number to lock-and-iterate.",
        )

        # --- Group B — CFG schedule + shift + ADG ---
        gr.Markdown("**CFG schedule + shift**", elem_classes=["ams-adv-section"])
        components["adv_cfg_interval_start"] = gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=0.0,
            step=0.05,
            label="CFG interval start",
            info="Fraction of diffusion at which CFG kicks in.",
        )
        components["adv_cfg_interval_end"] = gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=1.0,
            step=0.05,
            label="CFG interval end",
            info="Fraction of diffusion at which CFG stops.",
        )
        components["adv_shift"] = gr.Slider(
            minimum=0.5,
            maximum=3.0,
            value=1.0,
            step=0.1,
            label="Shift",
            info="Timestep shift. Try 0.7-1.3 for different feel.",
        )
        components["adv_use_adg"] = gr.Checkbox(
            value=False,
            label="Use Adaptive Dual Guidance (ADG)",
            info="Experimental — sometimes improves base model output.",
        )

        # --- Group C — 5Hz Language Model (CoT reasoning) ---
        gr.Markdown("**5Hz LM (CoT)**", elem_classes=["ams-adv-section"])
        components["adv_thinking"] = gr.Checkbox(
            value=True,
            label="Enable thinking (CoT)",
            info="Let the 5Hz LM reason before generating. Recommended ON.",
        )
        components["adv_use_cot_caption"] = gr.Checkbox(
            value=True,
            label="Let LM rewrite caption",
            info="LM expands/rephrases your prompt. Adds variety.",
        )
        components["adv_use_cot_metas"] = gr.Checkbox(
            value=True,
            label="Let LM infer metadata (bpm/key/time)",
            info="LM picks musical metadata. Turn off to force your manual values below.",
        )
        components["adv_use_cot_language"] = gr.Checkbox(
            value=True,
            label="Let LM detect vocal language",
            info="LM picks vocal language from caption + lyrics.",
        )
        components["adv_lm_temperature"] = gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=0.85,
            step=0.05,
            label="LM temperature",
            info="Higher = more creative metadata/structure.",
        )
        components["adv_lm_top_p"] = gr.Slider(
            minimum=0.0,
            maximum=1.0,
            value=0.9,
            step=0.05,
            label="LM top-p",
            info="Nucleus sampling.",
        )
        components["adv_lm_top_k"] = gr.Number(
            value=0,
            precision=0,
            label="LM top-k",
            info="0 = disabled.",
        )
        components["adv_lm_cfg_scale"] = gr.Slider(
            minimum=1.0,
            maximum=10.0,
            value=2.0,
            step=0.5,
            label="LM CFG scale",
            info="5Hz LM classifier-free guidance.",
        )
        components["adv_lm_negative_prompt"] = gr.Textbox(
            value="NO USER INPUT",
            label="LM negative prompt",
            info="Steer the LM AWAY from these traits.",
        )

        # --- Group D — Music metadata (manual overrides) ---
        gr.Markdown("**Music metadata**", elem_classes=["ams-adv-section"])
        components["adv_bpm"] = gr.Number(
            value=None,
            precision=0,
            label="BPM",
            info="Empty = auto. 30-300.",
        )
        components["adv_keyscale"] = gr.Textbox(
            value="",
            label="Key / scale",
            info="e.g. 'C Major', 'Am'. Empty = auto.",
        )
        components["adv_timesignature"] = gr.Dropdown(
            choices=["", "2", "3", "4", "6"],
            value="",
            label="Time signature",
            info="2=2/4, 3=3/4, 4=4/4, 6=6/8. Empty = auto.",
        )
        components["adv_vocal_language"] = gr.Dropdown(
            choices=["unknown", "en", "zh", "ja", "ko", "es", "fr", "de", "it", "pt", "ru"],
            value="unknown",
            label="Vocal language",
            info="Hint for the 5Hz LM. unknown = auto.",
        )


def _build_lora_accordion(components: dict[str, gr.components.Component]) -> None:
    """LoRA accordion with single-LoRA semantics. Mutates ``components``.

    Each song mode (generate / cover / extend / edit) calls this so the
    form has a consistent LoRA picker. Apple-Silicon ACE-Step fork's
    AceStepHandler can only hold one active adapter at a time (see
    ``lora_stack.apply_stack``), so the UI surfaces a single slot — a
    preset radio OR a custom upload — and a strength slider, with a
    Markdown "active LoRA" display.
    """
    with gr.Accordion(
        label="LoRA",
        open=False,
        elem_classes=["ams-lora", "ams-lora-accordion"],
    ):
        gr.Markdown(
            "_Only one LoRA at a time on this build. "
            "Picking a preset or uploading a custom file "
            "replaces the active LoRA._",
            elem_classes=["ams-lora-note"],
        )
        # Preset choices are read from presets/manifest.json so the
        # radio stays in sync with whatever official ACE-Step LoRAs
        # are actually published on HuggingFace.
        _preset_names = ["None"] + [p["name"] for p in lora_stack.load_presets()]
        components["lora_preset"] = gr.Radio(
            choices=_preset_names,
            value="None",
            label="Preset",
            elem_classes=["ams-lora-preset"],
            interactive=True,
        )
        components["lora_upload"] = gr.File(
            label="Custom LoRA (.safetensors)",
            file_types=[".safetensors"],
            file_count="single",
            elem_classes=["ams-lora-file"],
        )
        components["lora_strength"] = gr.Slider(
            minimum=0.0,
            maximum=1.5,
            step=0.05,
            value=0.95,
            label="Strength",
            info=tooltips.LORA_STRENGTH,
            elem_classes=["ams-lora-strength"],
        )
        components["lora_active"] = gr.Markdown(
            "_No LoRA active_",
            elem_classes=["ams-lora-active"],
        )
        # Hidden state holding the resolved active LoRA dict
        # ``{name, scale, path, sha256}`` so the click handler can pass
        # it straight to backend.dispatch.
        components["lora_state"] = gr.State(None)


def _build_output_panel(components: dict[str, gr.components.Component]) -> None:
    """Shared OUTPUT (gr.Audio) + post-process actions + METADATA (gr.JSON).

    elem_classes on each output component give CSS hooks for the
    Brutalist Mono treatment (uppercase mono labels + bordered
    empty-state panels). Without these we'd need to target
    svelte-hashed classes which can change across Gradio versions.

    gr.JSON renders a dict directly as a syntax-highlighted, expandable
    tree. gr.Code(language="json") refuses dicts — it requires a
    pre-stringified blob — and crashes with "'dict' has no .strip()".

    Below the Audio we expose three secondary post-process actions
    (M5/G2): Demucs stem separation, pyloudnorm LUFS normalisation, and
    ffmpeg MP3 export. Each emits to a hidden output (stem_files /
    normalised_audio / mp3_file) that becomes visible only once the
    click handler returns a populated value.
    """
    components["output_audio"] = gr.Audio(
        label="Output",
        type="filepath",
        interactive=False,
        elem_classes=["ams-out", "ams-out-audio"],
    )
    with gr.Row(elem_classes=["ams-post-actions"]):
        components["separate_stems_btn"] = gr.Button(
            "↯ Separate stems",
            variant="secondary",
            elem_classes=["ams-post-btn"],
        )
        components["normalise_btn"] = gr.Button(
            "ā–® Normalise -14 LUFS",
            variant="secondary",
            elem_classes=["ams-post-btn"],
        )
        components["mp3_btn"] = gr.Button(
            "↓ MP3 320k",
            variant="secondary",
            elem_classes=["ams-post-btn"],
        )
    components["stem_files"] = gr.Files(
        label="Stems",
        visible=False,
        elem_classes=["ams-stem-files"],
    )
    components["normalised_audio"] = gr.Audio(
        label="Normalised (-14 LUFS)",
        type="filepath",
        interactive=False,
        visible=False,
        elem_classes=["ams-out", "ams-out-normalised"],
    )
    components["mp3_file"] = gr.File(
        label="MP3 download",
        visible=False,
        elem_classes=["ams-mp3-file"],
    )
    components["output_meta"] = gr.JSON(
        label="Metadata",
        elem_classes=["ams-out", "ams-out-meta"],
    )


def build_generate_tab() -> dict[str, gr.components.Component]:
    """Generate tab body: 2-column row (form left, output right).

    Includes a single-LoRA picker in a collapsed accordion between the
    duration/vocal-mode row and the Generate button.

    Advanced / LM-planner / DCW accordions are deferred to M2-M4 and
    will be added by extending this builder.
    """
    components: dict[str, gr.components.Component] = {}

    with gr.Row():
        # --- FORM column (left, ~60% width) ---
        with gr.Column(scale=13):
            components["prompt"] = gr.Textbox(
                label="Style prompt",
                placeholder="psytrance, rolling triplet bassline, acid squelch, metallic leads",
                lines=2,
                info=tooltips.GENERATE_PROMPT,
            )
            components["lyrics"] = gr.Textbox(
                label="Lyrics",
                placeholder="[intro] atmospheric pads\n[verse] ...",
                lines=6,
                info=tooltips.GENERATE_LYRICS,
            )
            with gr.Row():
                components["duration_s"] = gr.Slider(
                    minimum=5,
                    maximum=240,
                    step=5,
                    value=30,
                    label="Duration (s)",
                    info=tooltips.GENERATE_DURATION,
                )
                components["instrumental"] = gr.Radio(
                    choices=["With vocals", "Instrumental"],
                    value="With vocals",
                    label="Vocal mode",
                    info=tooltips.GENERATE_VOCAL,
                )

            _build_lora_accordion(components)
            _build_advanced_accordion(components)

            components["generate_btn"] = gr.Button(
                "ā–¶ Generate",
                variant="primary",
            )

        # --- OUTPUT column (right, ~40% width) ---
        with gr.Column(scale=10):
            _build_output_panel(components)

    return components


def build_cover_tab() -> dict[str, gr.components.Component]:
    """Cover tab body: reference audio + new lyrics -> cover in that style.

    Maps to ACE-Step's ``task_type="cover"`` with the uploaded reference
    feeding ``reference_audio`` and the strength slider controlling
    ``audio_cover_strength``. Higher strength clings to the reference;
    lower lets the new prompt/lyrics drift the timbre.
    """
    components: dict[str, gr.components.Component] = {}
    with gr.Row():
        with gr.Column(scale=13):
            components["ref_audio"] = gr.Audio(
                label="Reference audio",
                type="filepath",
                sources=["upload"],
                elem_classes=["ams-input-audio"],
            )
            components["prompt"] = gr.Textbox(
                label="New style prompt (optional)",
                placeholder="faster, more aggressive leads",
                lines=2,
                info=tooltips.COVER_PROMPT,
            )
            components["lyrics"] = gr.Textbox(
                label="New lyrics",
                placeholder="[verse] new lyrics over the reference style",
                lines=5,
                info=tooltips.COVER_LYRICS,
            )
            with gr.Row():
                components["duration_s"] = gr.Slider(
                    minimum=5,
                    maximum=240,
                    step=5,
                    value=30,
                    label="Duration (s)",
                    info=tooltips.COVER_DURATION,
                )
                components["audio_cover_strength"] = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.01,
                    value=0.93,
                    label="Cover strength",
                    info=tooltips.COVER_STRENGTH,
                )

            _build_lora_accordion(components)
            _build_advanced_accordion(components)

            components["generate_btn"] = gr.Button(
                "ā–¶ Generate cover",
                variant="primary",
            )

        with gr.Column(scale=10):
            _build_output_panel(components)

    return components


def build_extend_tab() -> dict[str, gr.components.Component]:
    """Extend tab body: seed audio + extension prompt -> continued song.

    Maps to ACE-Step's ``task_type="repaint"`` with ``src_audio`` set to
    the uploaded seed and the repaint window pointing past the end of
    the seed so the model paints new audio after it.

    The repaint params (``repaint_mode``, ``repaint_strength``,
    ``latent_crossfade_frames``, ``chunk_mask_mode``, ``wav_crossfade_s``)
    are surfaced in an experimental accordion because the installed
    ACE-Step ``GenerationParams`` dataclass doesn't expose them yet — the
    UI captures them so they're ready to plumb through once upstream
    adds the fields.
    """
    components: dict[str, gr.components.Component] = {}
    with gr.Row():
        with gr.Column(scale=13):
            components["seed_audio"] = gr.Audio(
                label="Seed audio",
                type="filepath",
                sources=["upload"],
                elem_classes=["ams-input-audio"],
            )
            components["extra_prompt"] = gr.Textbox(
                label="Extension prompt",
                placeholder="build to climax, layered acid leads",
                lines=2,
                info=tooltips.EXTEND_PROMPT,
            )
            components["extension_lyrics"] = gr.Textbox(
                label="Extension lyrics (optional)",
                placeholder="[bridge] the drop is coming...",
                lines=4,
                info=tooltips.EXTEND_LYRICS,
            )
            with gr.Row():
                components["extra_duration_s"] = gr.Slider(
                    minimum=5,
                    maximum=120,
                    step=5,
                    value=60,
                    label="Extra duration (s)",
                    info=tooltips.EXTEND_DURATION,
                )
                components["wav_crossfade_s"] = gr.Slider(
                    minimum=0.0,
                    maximum=5.0,
                    step=0.1,
                    value=2.0,
                    label="WAV crossfade (s)",
                    info=tooltips.EXTEND_CROSSFADE,
                )

            with gr.Accordion(
                "Repaint params (experimental)",
                open=False,
                elem_classes=["ams-experimental"],
            ):
                gr.Markdown(
                    "_These knobs are captured in the request but the installed "
                    "ACE-Step dataclass doesn't expose them yet._",
                    elem_classes=["ams-lora-note"],
                )
                components["repaint_mode"] = gr.Dropdown(
                    choices=["balanced", "left", "right"],
                    value="balanced",
                    label="Repaint mode",
                )
                components["repaint_strength"] = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.5,
                    label="Repaint strength",
                )
                components["latent_crossfade_frames"] = gr.Slider(
                    minimum=0,
                    maximum=30,
                    step=1,
                    value=10,
                    label="Latent crossfade frames",
                )
                components["chunk_mask_mode"] = gr.Dropdown(
                    choices=["auto", "manual"],
                    value="auto",
                    label="Chunk mask",
                )

            _build_lora_accordion(components)
            _build_advanced_accordion(components)

            components["generate_btn"] = gr.Button(
                "ā–¶ Extend",
                variant="primary",
            )

        with gr.Column(scale=10):
            _build_output_panel(components)

    return components


def build_edit_tab() -> dict[str, gr.components.Component]:
    """Edit tab body: source audio + segment + target lyrics -> repaint/morph.

    Two sub-modes:

    - ``repaint`` (default): paint over [segment_start_s, segment_end_s]
      using ACE-Step's repaint task_type. ``segment_start_s`` and
      ``segment_end_s`` are wired through the params dict to
      ``repainting_start`` / ``repainting_end`` on the pipeline side.
    - ``flow_edit``: caption-to-caption morph. The installed ACE-Step
      ``GenerationParams`` has no ``flow_edit_*`` fields, so this
      sub-mode falls back to a repaint pass with lower
      ``audio_cover_strength``. The flow knobs are still captured so
      they're ready once upstream adds native support.
    """
    components: dict[str, gr.components.Component] = {}
    with gr.Row():
        with gr.Column(scale=13):
            components["source_audio"] = gr.Audio(
                label="Source audio",
                type="filepath",
                sources=["upload"],
                elem_classes=["ams-input-audio"],
            )
            components["sub_mode"] = gr.Radio(
                choices=["repaint", "flow_edit"],
                value="repaint",
                label="Edit sub-mode",
                info=tooltips.EDIT_SUB_MODE,
            )
            components["source_lyrics"] = gr.Textbox(
                label="Source lyrics",
                lines=3,
                info=tooltips.EDIT_SOURCE_LYRICS,
            )
            components["target_lyrics"] = gr.Textbox(
                label="Target lyrics",
                placeholder="[chorus] new chorus replaces the old",
                lines=3,
                info=tooltips.EDIT_TARGET_LYRICS,
            )
            with gr.Row():
                components["segment_start_s"] = gr.Number(
                    value=0.0,
                    label="Segment start (s)",
                    precision=1,
                    info=tooltips.EDIT_SEGMENT_START,
                )
                components["segment_end_s"] = gr.Number(
                    value=30.0,
                    label="Segment end (s)",
                    precision=1,
                    info=tooltips.EDIT_SEGMENT_END,
                )

            with gr.Accordion(
                "Repaint options (experimental)",
                open=False,
                elem_classes=["ams-experimental"],
            ):
                gr.Markdown(
                    "_These knobs are captured in the request but the installed "
                    "ACE-Step dataclass doesn't expose them yet._",
                    elem_classes=["ams-lora-note"],
                )
                components["repaint_strength"] = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.05,
                    value=0.5,
                    label="Repaint strength",
                )
                components["repaint_mode"] = gr.Dropdown(
                    choices=["balanced", "left", "right"],
                    value="balanced",
                    label="Repaint mode",
                )

            with gr.Accordion(
                "Flow-morph options (experimental)",
                open=False,
                elem_classes=["ams-experimental"],
            ):
                gr.Markdown(
                    "_flow_edit sub-mode currently falls back to a repaint pass with "
                    "lower audio_cover_strength. flow-specific params are captured "
                    "but not yet wired._",
                    elem_classes=["ams-lora-note"],
                )
                components["flow_source_caption"] = gr.Textbox(
                    label="Source caption",
                    placeholder="acoustic ballad, gentle piano",
                )
                components["flow_n_min"] = gr.Slider(
                    minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="n_min"
                )
                components["flow_n_max"] = gr.Slider(
                    minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="n_max"
                )
                components["flow_n_avg"] = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="n_avg")

            _build_lora_accordion(components)
            _build_advanced_accordion(components)

            components["generate_btn"] = gr.Button(
                "ā–¶ Apply edit",
                variant="primary",
            )

        with gr.Column(scale=10):
            _build_output_panel(components)

    return components


def build_lyrics_tab() -> dict[str, gr.components.Component]:
    """Lyrics tab body: Qwen 2.5 7B drafts structurally-tagged lyrics.

    Compact 2-column row: form on the left (brief / structure / language /
    line counts / tone / rhyme + collapsed LM-params accordion), output on
    the right (read-only multi-line textbox + ``Use these in Generate``
    cross-tab CTA + bordered JSON metadata panel).

    The output textbox carries ``elem_classes=["ams-lyrics-output"]`` so
    the Brutalist Mono treatment in ``theme.CSS`` (mono font, 12 px,
    280 px min-height) applies. The "Use in Generate" button is tagged
    ``ams-lyrics-use-btn`` so it gets a small top margin instead of
    sitting flush against the textbox.

    Does NOT include the LoRA accordion — Qwen-7B has no LoRA picker and
    the audio-mode LoRA semantics don't apply here.
    """
    c: dict[str, gr.components.Component] = {}
    with gr.Row():
        # --- FORM column (left) ---
        with gr.Column(scale=12):
            c["brief"] = gr.Textbox(
                label="Brief",
                lines=4,
                placeholder=("Describe the song. Tone, mood, references, specific images, lines to avoid…"),
                info=tooltips.LYRICS_BRIEF,
            )
            with gr.Row():
                c["structure"] = gr.Textbox(
                    label="Structure",
                    value="intro, verse, chorus, verse, chorus, bridge, chorus, outro",
                    info=tooltips.LYRICS_STRUCTURE,
                )
                c["language"] = gr.Dropdown(
                    choices=["en", "zh", "ja", "ko", "es", "fr", "de"],
                    value="en",
                    label="Language",
                    info=tooltips.LYRICS_LANGUAGE,
                )
            with gr.Row():
                c["verse_lines"] = gr.Slider(
                    minimum=2,
                    maximum=10,
                    value=6,
                    step=1,
                    label="Verse lines",
                )
                c["chorus_lines"] = gr.Slider(
                    minimum=2,
                    maximum=8,
                    value=4,
                    step=1,
                    label="Chorus lines",
                )
                c["bridge_lines"] = gr.Slider(
                    minimum=1,
                    maximum=6,
                    value=2,
                    step=1,
                    label="Bridge lines",
                )
            c["tone"] = gr.Textbox(
                label="Tone / mood",
                placeholder="euphoric, hypnotic, transcendent, not cheesy",
                info=tooltips.LYRICS_TONE,
            )
            c["rhyme"] = gr.Radio(
                choices=["strict", "loose", "none"],
                value="loose",
                label="Rhyme",
            )
            with gr.Accordion(
                "LM parameters",
                open=False,
                elem_classes=["ams-lm-accordion"],
            ):
                c["temperature"] = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.85,
                    step=0.05,
                    label="Temperature",
                    info=tooltips.LYRICS_TEMPERATURE,
                )
                c["top_p"] = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    label="Top-p",
                    info=tooltips.LYRICS_TOP_P,
                )
                c["top_k"] = gr.Slider(
                    minimum=0,
                    maximum=200,
                    value=40,
                    step=1,
                    label="Top-k",
                    info=tooltips.LYRICS_TOP_K,
                )
                c["max_new_tokens"] = gr.Slider(
                    minimum=100,
                    maximum=2000,
                    value=600,
                    step=50,
                    label="Max new tokens",
                    info=tooltips.LYRICS_MAX_TOKENS,
                )
                c["seed"] = gr.Number(
                    value=42,
                    precision=0,
                    label="Seed",
                )
            c["draft_btn"] = gr.Button(
                "ā–¶ Draft lyrics",
                variant="primary",
            )

        # --- OUTPUT column (right) ---
        with gr.Column(scale=10):
            # NOTE: gr.Textbox in Gradio 6.14 doesn't accept ``show_copy_button``
            # (the kwarg landed in a later 6.x). The Brutalist Mono textbox already
            # exposes a native selection + browser copy via Cmd-A / Cmd-C; the
            # copy-button affordance is therefore a no-op miss here.
            c["lyrics_output"] = gr.Textbox(
                label="Draft",
                lines=14,
                interactive=False,
                elem_classes=["ams-lyrics-output"],
            )
            c["use_in_generate_btn"] = gr.Button(
                "↑ Use these in Generate",
                variant="primary",
                elem_classes=["ams-lyrics-use-btn"],
            )
            c["meta_output"] = gr.JSON(
                label="Metadata",
                elem_classes=["ams-out", "ams-out-meta"],
            )
    return c