techfreakworm commited on
Commit
adb7693
·
unverified ·
1 Parent(s): 26dc3a4

feat(ui): wire cover/extend/edit tabs into the sidebar layout (m3 e3+e4)

Browse files

Replaces the three placeholder Markdowns in pane_cover / pane_extend /
pane_edit with full form builders mirroring the Generate tab's
structure (2-column row, LoRA accordion, output panel).

Each tab carries mode-specific inputs:
- Cover: reference audio + audio_cover_strength slider
- Extend: seed audio + extra_duration + experimental repaint params
- Edit: source audio + sub_mode pills (repaint/flow_edit) +
segment_start_s/segment_end_s + experimental flow-morph params

Shared chrome (LoRA accordion + Output/Metadata panels) factored
into _build_lora_accordion / _build_output_panel helpers so future
mode additions don't duplicate the same 30-line block.

Fields not yet wired to GenerationParams (repaint_mode,
repaint_strength, chunk_mask_mode, latent_crossfade_frames,
wav_crossfade_s, flow_source_caption, flow_n_*, cover_noise_strength)
are surfaced under "(experimental)" accordions with inline notes
explaining they're captured but no-ops until ACE-Step adds the
fields upstream. The plumbing already passes them through
backend.dispatch -> ACEStepStudio.generate -> internal params dict.

Verified at 360 px (mobile) and 1440 px (desktop) via Playwright;
each new tab's form + output panel render without regression vs the
Generate tab's Brutalist Mono baseline.

Files changed (3) hide show
  1. app.py +210 -3
  2. theme.py +75 -0
  3. ui.py +340 -59
app.py CHANGED
@@ -191,6 +191,121 @@ def on_generate_click(
191
  return out_path, meta
192
 
193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  HEADER_HTML = """
195
  <div class="ams-header">
196
  <div>
@@ -300,11 +415,103 @@ def build_app() -> gr.Blocks:
300
  outputs=[g["output_audio"], g["output_meta"]],
301
  )
302
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_cover:
303
- gr.Markdown("### 🎤 Cover\n\nPlaceholder — implemented in M3.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_extend:
305
- gr.Markdown("### Extend\n\nPlaceholder — implemented in M3.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_edit:
307
- gr.Markdown("### ✏️ Edit\n\nPlaceholder — implemented in M3.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_lyrics:
309
  gr.Markdown("### ✍️ Lyrics\n\nPlaceholder — implemented in M4.")
310
 
 
191
  return out_path, meta
192
 
193
 
194
+ def on_cover_click(
195
+ ref_audio,
196
+ prompt: str,
197
+ lyrics: str,
198
+ duration_s: float,
199
+ audio_cover_strength: float,
200
+ lora_state,
201
+ progress=gr.Progress(track_tqdm=True), # noqa: B008
202
+ ):
203
+ """Cover-mode click. ref_audio is a filepath from gr.Audio(type='filepath')."""
204
+ loras = [lora_state] if lora_state else []
205
+ try:
206
+ return modes.cover(
207
+ get_backend(),
208
+ params={
209
+ "ref_audio": ref_audio,
210
+ "prompt": prompt,
211
+ "lyrics": lyrics,
212
+ "duration_s": int(duration_s),
213
+ "audio_cover_strength": float(audio_cover_strength),
214
+ "seed": random.randint(1, 2_147_483_647),
215
+ "loras": loras,
216
+ "advanced": {},
217
+ "lm": {},
218
+ "dcw": {},
219
+ },
220
+ )
221
+ except ValueError as e:
222
+ raise gr.Error(str(e)) from e
223
+
224
+
225
+ def on_extend_click(
226
+ seed_audio,
227
+ extra_prompt: str,
228
+ extension_lyrics: str,
229
+ extra_duration_s: float,
230
+ wav_crossfade_s: float,
231
+ repaint_mode: str,
232
+ repaint_strength: float,
233
+ latent_crossfade_frames: float,
234
+ chunk_mask_mode: str,
235
+ lora_state,
236
+ progress=gr.Progress(track_tqdm=True), # noqa: B008
237
+ ):
238
+ """Extend-mode click. seed_audio is a filepath from gr.Audio(type='filepath')."""
239
+ loras = [lora_state] if lora_state else []
240
+ try:
241
+ return modes.extend(
242
+ get_backend(),
243
+ params={
244
+ "seed_audio": seed_audio,
245
+ "extra_prompt": extra_prompt,
246
+ "extension_lyrics": extension_lyrics,
247
+ "extra_duration_s": int(extra_duration_s),
248
+ "wav_crossfade_s": float(wav_crossfade_s),
249
+ "repaint_mode": repaint_mode,
250
+ "repaint_strength": float(repaint_strength),
251
+ "latent_crossfade_frames": int(latent_crossfade_frames),
252
+ "chunk_mask_mode": chunk_mask_mode,
253
+ "seed": random.randint(1, 2_147_483_647),
254
+ "loras": loras,
255
+ "advanced": {},
256
+ "lm": {},
257
+ "dcw": {},
258
+ },
259
+ )
260
+ except ValueError as e:
261
+ raise gr.Error(str(e)) from e
262
+
263
+
264
+ def on_edit_click(
265
+ source_audio,
266
+ sub_mode: str,
267
+ source_lyrics: str,
268
+ target_lyrics: str,
269
+ segment_start_s: float,
270
+ segment_end_s: float,
271
+ repaint_strength: float,
272
+ repaint_mode: str,
273
+ flow_source_caption: str,
274
+ flow_n_min: float,
275
+ flow_n_max: float,
276
+ flow_n_avg: float,
277
+ lora_state,
278
+ progress=gr.Progress(track_tqdm=True), # noqa: B008
279
+ ):
280
+ """Edit-mode click. source_audio is a filepath from gr.Audio(type='filepath')."""
281
+ loras = [lora_state] if lora_state else []
282
+ try:
283
+ return modes.edit(
284
+ get_backend(),
285
+ params={
286
+ "source_audio": source_audio,
287
+ "sub_mode": sub_mode,
288
+ "source_lyrics": source_lyrics,
289
+ "target_lyrics": target_lyrics,
290
+ "segment_start_s": float(segment_start_s),
291
+ "segment_end_s": float(segment_end_s),
292
+ "repaint_strength": float(repaint_strength),
293
+ "repaint_mode": repaint_mode,
294
+ "flow_source_caption": flow_source_caption,
295
+ "flow_n_min": float(flow_n_min),
296
+ "flow_n_max": float(flow_n_max),
297
+ "flow_n_avg": int(flow_n_avg),
298
+ "seed": random.randint(1, 2_147_483_647),
299
+ "loras": loras,
300
+ "advanced": {},
301
+ "lm": {},
302
+ "dcw": {},
303
+ },
304
+ )
305
+ except ValueError as e:
306
+ raise gr.Error(str(e)) from e
307
+
308
+
309
  HEADER_HTML = """
310
  <div class="ams-header">
311
  <div>
 
415
  outputs=[g["output_audio"], g["output_meta"]],
416
  )
417
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_cover:
418
+ c = ui.build_cover_tab()
419
+ c["lora_preset"].change(
420
+ fn=on_lora_preset_change,
421
+ inputs=[c["lora_preset"], c["lora_strength"]],
422
+ outputs=[c["lora_state"], c["lora_active"], c["lora_upload"]],
423
+ )
424
+ c["lora_upload"].change(
425
+ fn=on_lora_upload,
426
+ inputs=[c["lora_upload"], c["lora_strength"]],
427
+ outputs=[c["lora_state"], c["lora_active"], c["lora_preset"]],
428
+ )
429
+ c["lora_strength"].change(
430
+ fn=on_lora_strength_change,
431
+ inputs=[c["lora_state"], c["lora_strength"]],
432
+ outputs=[c["lora_state"], c["lora_active"]],
433
+ )
434
+ c["generate_btn"].click(
435
+ fn=on_cover_click,
436
+ inputs=[
437
+ c["ref_audio"],
438
+ c["prompt"],
439
+ c["lyrics"],
440
+ c["duration_s"],
441
+ c["audio_cover_strength"],
442
+ c["lora_state"],
443
+ ],
444
+ outputs=[c["output_audio"], c["output_meta"]],
445
+ )
446
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_extend:
447
+ x = ui.build_extend_tab()
448
+ x["lora_preset"].change(
449
+ fn=on_lora_preset_change,
450
+ inputs=[x["lora_preset"], x["lora_strength"]],
451
+ outputs=[x["lora_state"], x["lora_active"], x["lora_upload"]],
452
+ )
453
+ x["lora_upload"].change(
454
+ fn=on_lora_upload,
455
+ inputs=[x["lora_upload"], x["lora_strength"]],
456
+ outputs=[x["lora_state"], x["lora_active"], x["lora_preset"]],
457
+ )
458
+ x["lora_strength"].change(
459
+ fn=on_lora_strength_change,
460
+ inputs=[x["lora_state"], x["lora_strength"]],
461
+ outputs=[x["lora_state"], x["lora_active"]],
462
+ )
463
+ x["generate_btn"].click(
464
+ fn=on_extend_click,
465
+ inputs=[
466
+ x["seed_audio"],
467
+ x["extra_prompt"],
468
+ x["extension_lyrics"],
469
+ x["extra_duration_s"],
470
+ x["wav_crossfade_s"],
471
+ x["repaint_mode"],
472
+ x["repaint_strength"],
473
+ x["latent_crossfade_frames"],
474
+ x["chunk_mask_mode"],
475
+ x["lora_state"],
476
+ ],
477
+ outputs=[x["output_audio"], x["output_meta"]],
478
+ )
479
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_edit:
480
+ e = ui.build_edit_tab()
481
+ e["lora_preset"].change(
482
+ fn=on_lora_preset_change,
483
+ inputs=[e["lora_preset"], e["lora_strength"]],
484
+ outputs=[e["lora_state"], e["lora_active"], e["lora_upload"]],
485
+ )
486
+ e["lora_upload"].change(
487
+ fn=on_lora_upload,
488
+ inputs=[e["lora_upload"], e["lora_strength"]],
489
+ outputs=[e["lora_state"], e["lora_active"], e["lora_preset"]],
490
+ )
491
+ e["lora_strength"].change(
492
+ fn=on_lora_strength_change,
493
+ inputs=[e["lora_state"], e["lora_strength"]],
494
+ outputs=[e["lora_state"], e["lora_active"]],
495
+ )
496
+ e["generate_btn"].click(
497
+ fn=on_edit_click,
498
+ inputs=[
499
+ e["source_audio"],
500
+ e["sub_mode"],
501
+ e["source_lyrics"],
502
+ e["target_lyrics"],
503
+ e["segment_start_s"],
504
+ e["segment_end_s"],
505
+ e["repaint_strength"],
506
+ e["repaint_mode"],
507
+ e["flow_source_caption"],
508
+ e["flow_n_min"],
509
+ e["flow_n_max"],
510
+ e["flow_n_avg"],
511
+ e["lora_state"],
512
+ ],
513
+ outputs=[e["output_audio"], e["output_meta"]],
514
+ )
515
  with gr.Group(visible=False, elem_classes=["ams-tab-pane"]) as pane_lyrics:
516
  gr.Markdown("### ✍️ Lyrics\n\nPlaceholder — implemented in M4.")
517
 
theme.py CHANGED
@@ -786,6 +786,81 @@ main, .contain {{
786
  }}
787
  }}
788
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
789
  /* Hide Gradio footer + the floating "Use via API" / settings panel */
790
  footer {{ display:none !important; }}
791
  .show-api {{ display:none !important; }}
 
786
  }}
787
  }}
788
 
789
+ /* ============================================================
790
+ * Audio upload widget (Cover / Extend / Edit reference inputs)
791
+ * Tagged with ``ams-input-audio`` via elem_classes. Match the dark
792
+ * input chrome so it sits next to the textboxes without contrast
793
+ * jumps; the gr.Audio drop-button gets the same dashed outline as
794
+ * the LoRA upload so users recognise it as a drop-zone.
795
+ * ============================================================ */
796
+ .ams-content .ams-input-audio {{
797
+ background:{SURFACE_STRONG} !important;
798
+ border:1px solid {BORDER} !important;
799
+ border-radius:3px !important;
800
+ padding:8px !important;
801
+ margin-bottom:4px !important;
802
+ }}
803
+ .ams-content .ams-input-audio .empty,
804
+ .ams-content .ams-input-audio [class*="empty"] {{
805
+ min-height:90px !important;
806
+ }}
807
+ .ams-content .ams-input-audio button {{
808
+ background:#000 !important;
809
+ border:1px dashed {BORDER_STRONG} !important;
810
+ border-radius:3px !important;
811
+ color:{INK_MUTED} !important;
812
+ font-family: {FONT_MONO} !important;
813
+ font-size:10px !important;
814
+ letter-spacing:0.04em !important;
815
+ }}
816
+ .ams-content .ams-input-audio button:hover {{
817
+ border-color:{PRIMARY} !important;
818
+ color:{INK} !important;
819
+ }}
820
+ .ams-content .ams-input-audio svg {{
821
+ color:{INK_MUTED} !important;
822
+ opacity:0.7 !important;
823
+ }}
824
+
825
+ /* ============================================================
826
+ * Experimental accordion (Extend / Edit)
827
+ * Reuse the LoRA accordion's visual chrome so the bordered section
828
+ * shape is consistent across all accordions, but visually demote
829
+ * the summary so users can tell these knobs aren't fully wired.
830
+ * ============================================================ */
831
+ .ams-content .ams-experimental {{
832
+ border:1px solid {BORDER} !important;
833
+ border-radius:3px !important;
834
+ background:{SURFACE_STRONG} !important;
835
+ margin-top:10px !important;
836
+ padding:0 !important;
837
+ }}
838
+ .ams-content .ams-experimental > .label-wrap,
839
+ .ams-content .ams-experimental summary,
840
+ .ams-content .ams-experimental > button {{
841
+ font-family: {FONT_MONO} !important;
842
+ font-size:10px !important;
843
+ letter-spacing:0.08em !important;
844
+ text-transform:uppercase !important;
845
+ color:{INK_MUTED} !important;
846
+ padding:10px 12px !important;
847
+ background:transparent !important;
848
+ border:none !important;
849
+ opacity:0.7 !important;
850
+ }}
851
+ .ams-content .ams-experimental > .label-wrap span,
852
+ .ams-content .ams-experimental summary span,
853
+ .ams-content .ams-experimental > button span {{
854
+ color:{INK_MUTED} !important;
855
+ font-family: {FONT_MONO} !important;
856
+ font-size:10px !important;
857
+ letter-spacing:0.08em !important;
858
+ text-transform:uppercase !important;
859
+ }}
860
+ .ams-content .ams-experimental > div:not(.label-wrap):not(summary) {{
861
+ padding:0 12px 12px 12px !important;
862
+ }}
863
+
864
  /* Hide Gradio footer + the floating "Use via API" / settings panel */
865
  footer {{ display:none !important; }}
866
  .show-api {{ display:none !important; }}
ui.py CHANGED
@@ -16,15 +16,91 @@ import lora_stack
16
  import tooltips
17
 
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def build_generate_tab() -> dict[str, gr.components.Component]:
20
  """Generate tab body: 2-column row (form left, output right).
21
 
22
  Includes a single-LoRA picker in a collapsed accordion between the
23
- duration/vocal-mode row and the Generate button. The Apple-Silicon
24
- ACE-Step fork's AceStepHandler only supports one active LoRA at a
25
- time (see ``lora_stack.apply_stack`` for the gory details), so the
26
- UI surfaces a single slot — a preset radio OR a custom upload — and
27
- a strength slider, with a Markdown "active LoRA" display.
28
 
29
  Advanced / LM-planner / DCW accordions are deferred to M2-M4 and
30
  will be added by extending this builder.
@@ -62,78 +138,283 @@ def build_generate_tab() -> dict[str, gr.components.Component]:
62
  info=tooltips.GENERATE_VOCAL,
63
  )
64
 
65
- # --- LoRA accordion (collapsed by default) ---
66
- # Single-LoRA-slot UI: the apple-silicon fork's AceStepHandler
67
- # can only hold one active adapter, so multi-row stacks are
68
- # deferred until upstream lands multi-adapter support.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  with gr.Accordion(
70
- label="LoRA",
71
  open=False,
72
- elem_classes=["ams-lora", "ams-lora-accordion"],
73
  ):
74
  gr.Markdown(
75
- "_Only one LoRA at a time on this build. "
76
- "Picking a preset or uploading a custom file "
77
- "replaces the active LoRA._",
78
  elem_classes=["ams-lora-note"],
79
  )
80
- # Preset choices are read from presets/manifest.json so the
81
- # radio stays in sync with whatever official ACE-Step LoRAs
82
- # are actually published on HuggingFace.
83
- _preset_names = ["None"] + [p["name"] for p in lora_stack.load_presets()]
84
- components["lora_preset"] = gr.Radio(
85
- choices=_preset_names,
86
- value="None",
87
- label="Preset",
88
- elem_classes=["ams-lora-preset"],
89
- interactive=True,
90
- )
91
- components["lora_upload"] = gr.File(
92
- label="Custom LoRA (.safetensors)",
93
- file_types=[".safetensors"],
94
- file_count="single",
95
- elem_classes=["ams-lora-file"],
96
- )
97
- components["lora_strength"] = gr.Slider(
98
  minimum=0.0,
99
- maximum=1.5,
100
  step=0.05,
101
- value=0.95,
102
- label="Strength",
103
- elem_classes=["ams-lora-strength"],
 
 
 
 
 
 
104
  )
105
- components["lora_active"] = gr.Markdown(
106
- "_No LoRA active_",
107
- elem_classes=["ams-lora-active"],
 
108
  )
109
- # Hidden state holding the resolved active LoRA dict
110
- # ``{name, scale, path, sha256}`` so on_generate_click
111
- # can pass it straight to backend.dispatch.
112
- components["lora_state"] = gr.State(None)
113
 
114
  components["generate_btn"] = gr.Button(
115
- "▶ Generate",
116
  variant="primary",
117
  )
118
 
119
- # --- OUTPUT column (right, ~40% width) ---
120
- # elem_classes on each output component give CSS hooks for the
121
- # Brutalist Mono treatment (uppercase mono labels + bordered
122
- # empty-state panels). Without these we'd need to target
123
- # svelte-hashed classes which can change across Gradio versions.
124
  with gr.Column(scale=10):
125
- components["output_audio"] = gr.Audio(
126
- label="Output",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  type="filepath",
128
- interactive=False,
129
- elem_classes=["ams-out", "ams-out-audio"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  )
131
- # gr.JSON renders a dict directly as a syntax-highlighted, expandable
132
- # tree. gr.Code(language="json") refuses dicts — it requires a
133
- # pre-stringified blob — and crashes with "'dict' has no .strip()".
134
- components["output_meta"] = gr.JSON(
135
- label="Metadata",
136
- elem_classes=["ams-out", "ams-out-meta"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
  )
138
 
 
 
 
139
  return components
 
16
  import tooltips
17
 
18
 
19
+ def _build_lora_accordion(components: dict[str, gr.components.Component]) -> None:
20
+ """LoRA accordion with single-LoRA semantics. Mutates ``components``.
21
+
22
+ Each song mode (generate / cover / extend / edit) calls this so the
23
+ form has a consistent LoRA picker. Apple-Silicon ACE-Step fork's
24
+ AceStepHandler can only hold one active adapter at a time (see
25
+ ``lora_stack.apply_stack``), so the UI surfaces a single slot — a
26
+ preset radio OR a custom upload — and a strength slider, with a
27
+ Markdown "active LoRA" display.
28
+ """
29
+ with gr.Accordion(
30
+ label="LoRA",
31
+ open=False,
32
+ elem_classes=["ams-lora", "ams-lora-accordion"],
33
+ ):
34
+ gr.Markdown(
35
+ "_Only one LoRA at a time on this build. "
36
+ "Picking a preset or uploading a custom file "
37
+ "replaces the active LoRA._",
38
+ elem_classes=["ams-lora-note"],
39
+ )
40
+ # Preset choices are read from presets/manifest.json so the
41
+ # radio stays in sync with whatever official ACE-Step LoRAs
42
+ # are actually published on HuggingFace.
43
+ _preset_names = ["None"] + [p["name"] for p in lora_stack.load_presets()]
44
+ components["lora_preset"] = gr.Radio(
45
+ choices=_preset_names,
46
+ value="None",
47
+ label="Preset",
48
+ elem_classes=["ams-lora-preset"],
49
+ interactive=True,
50
+ )
51
+ components["lora_upload"] = gr.File(
52
+ label="Custom LoRA (.safetensors)",
53
+ file_types=[".safetensors"],
54
+ file_count="single",
55
+ elem_classes=["ams-lora-file"],
56
+ )
57
+ components["lora_strength"] = gr.Slider(
58
+ minimum=0.0,
59
+ maximum=1.5,
60
+ step=0.05,
61
+ value=0.95,
62
+ label="Strength",
63
+ elem_classes=["ams-lora-strength"],
64
+ )
65
+ components["lora_active"] = gr.Markdown(
66
+ "_No LoRA active_",
67
+ elem_classes=["ams-lora-active"],
68
+ )
69
+ # Hidden state holding the resolved active LoRA dict
70
+ # ``{name, scale, path, sha256}`` so the click handler can pass
71
+ # it straight to backend.dispatch.
72
+ components["lora_state"] = gr.State(None)
73
+
74
+
75
+ def _build_output_panel(components: dict[str, gr.components.Component]) -> None:
76
+ """Shared OUTPUT (gr.Audio) + METADATA (gr.JSON) bordered panels.
77
+
78
+ elem_classes on each output component give CSS hooks for the
79
+ Brutalist Mono treatment (uppercase mono labels + bordered
80
+ empty-state panels). Without these we'd need to target
81
+ svelte-hashed classes which can change across Gradio versions.
82
+
83
+ gr.JSON renders a dict directly as a syntax-highlighted, expandable
84
+ tree. gr.Code(language="json") refuses dicts — it requires a
85
+ pre-stringified blob — and crashes with "'dict' has no .strip()".
86
+ """
87
+ components["output_audio"] = gr.Audio(
88
+ label="Output",
89
+ type="filepath",
90
+ interactive=False,
91
+ elem_classes=["ams-out", "ams-out-audio"],
92
+ )
93
+ components["output_meta"] = gr.JSON(
94
+ label="Metadata",
95
+ elem_classes=["ams-out", "ams-out-meta"],
96
+ )
97
+
98
+
99
  def build_generate_tab() -> dict[str, gr.components.Component]:
100
  """Generate tab body: 2-column row (form left, output right).
101
 
102
  Includes a single-LoRA picker in a collapsed accordion between the
103
+ duration/vocal-mode row and the Generate button.
 
 
 
 
104
 
105
  Advanced / LM-planner / DCW accordions are deferred to M2-M4 and
106
  will be added by extending this builder.
 
138
  info=tooltips.GENERATE_VOCAL,
139
  )
140
 
141
+ _build_lora_accordion(components)
142
+
143
+ components["generate_btn"] = gr.Button(
144
+ "▶ Generate",
145
+ variant="primary",
146
+ )
147
+
148
+ # --- OUTPUT column (right, ~40% width) ---
149
+ with gr.Column(scale=10):
150
+ _build_output_panel(components)
151
+
152
+ return components
153
+
154
+
155
+ def build_cover_tab() -> dict[str, gr.components.Component]:
156
+ """Cover tab body: reference audio + new lyrics -> cover in that style.
157
+
158
+ Maps to ACE-Step's ``task_type="cover"`` with the uploaded reference
159
+ feeding ``reference_audio`` and the strength slider controlling
160
+ ``audio_cover_strength``. Higher strength clings to the reference;
161
+ lower lets the new prompt/lyrics drift the timbre.
162
+ """
163
+ components: dict[str, gr.components.Component] = {}
164
+ with gr.Row():
165
+ with gr.Column(scale=13):
166
+ components["ref_audio"] = gr.Audio(
167
+ label="Reference audio",
168
+ type="filepath",
169
+ sources=["upload"],
170
+ elem_classes=["ams-input-audio"],
171
+ )
172
+ components["prompt"] = gr.Textbox(
173
+ label="New style prompt (optional)",
174
+ placeholder="faster, more aggressive leads",
175
+ lines=2,
176
+ )
177
+ components["lyrics"] = gr.Textbox(
178
+ label="New lyrics",
179
+ placeholder="[verse] new lyrics over the reference style",
180
+ lines=5,
181
+ )
182
+ with gr.Row():
183
+ components["duration_s"] = gr.Slider(
184
+ minimum=5,
185
+ maximum=240,
186
+ step=5,
187
+ value=30,
188
+ label="Duration (s)",
189
+ )
190
+ components["audio_cover_strength"] = gr.Slider(
191
+ minimum=0.0,
192
+ maximum=1.0,
193
+ step=0.01,
194
+ value=0.93,
195
+ label="Cover strength",
196
+ info="Higher = closer to reference. Lower = more drift.",
197
+ )
198
+
199
+ _build_lora_accordion(components)
200
+
201
+ components["generate_btn"] = gr.Button(
202
+ "▶ Generate cover",
203
+ variant="primary",
204
+ )
205
+
206
+ with gr.Column(scale=10):
207
+ _build_output_panel(components)
208
+
209
+ return components
210
+
211
+
212
+ def build_extend_tab() -> dict[str, gr.components.Component]:
213
+ """Extend tab body: seed audio + extension prompt -> continued song.
214
+
215
+ Maps to ACE-Step's ``task_type="repaint"`` with ``src_audio`` set to
216
+ the uploaded seed and the repaint window pointing past the end of
217
+ the seed so the model paints new audio after it.
218
+
219
+ The repaint params (``repaint_mode``, ``repaint_strength``,
220
+ ``latent_crossfade_frames``, ``chunk_mask_mode``, ``wav_crossfade_s``)
221
+ are surfaced in an experimental accordion because the installed
222
+ ACE-Step ``GenerationParams`` dataclass doesn't expose them yet — the
223
+ UI captures them so they're ready to plumb through once upstream
224
+ adds the fields.
225
+ """
226
+ components: dict[str, gr.components.Component] = {}
227
+ with gr.Row():
228
+ with gr.Column(scale=13):
229
+ components["seed_audio"] = gr.Audio(
230
+ label="Seed audio",
231
+ type="filepath",
232
+ sources=["upload"],
233
+ elem_classes=["ams-input-audio"],
234
+ )
235
+ components["extra_prompt"] = gr.Textbox(
236
+ label="Extension prompt",
237
+ placeholder="build to climax, layered acid leads",
238
+ lines=2,
239
+ )
240
+ components["extension_lyrics"] = gr.Textbox(
241
+ label="Extension lyrics (optional)",
242
+ placeholder="[bridge] the drop is coming...",
243
+ lines=4,
244
+ )
245
+ with gr.Row():
246
+ components["extra_duration_s"] = gr.Slider(
247
+ minimum=5,
248
+ maximum=120,
249
+ step=5,
250
+ value=60,
251
+ label="Extra duration (s)",
252
+ )
253
+ components["wav_crossfade_s"] = gr.Slider(
254
+ minimum=0.0,
255
+ maximum=5.0,
256
+ step=0.1,
257
+ value=2.0,
258
+ label="WAV crossfade (s)",
259
+ info="Experimental — not yet wired in this acestep build.",
260
+ )
261
+
262
  with gr.Accordion(
263
+ "Repaint params (experimental)",
264
  open=False,
265
+ elem_classes=["ams-experimental"],
266
  ):
267
  gr.Markdown(
268
+ "_These knobs are captured in the request but the installed "
269
+ "ACE-Step dataclass doesn't expose them yet._",
 
270
  elem_classes=["ams-lora-note"],
271
  )
272
+ components["repaint_mode"] = gr.Dropdown(
273
+ choices=["balanced", "left", "right"],
274
+ value="balanced",
275
+ label="Repaint mode",
276
+ )
277
+ components["repaint_strength"] = gr.Slider(
 
 
 
 
 
 
 
 
 
 
 
 
278
  minimum=0.0,
279
+ maximum=1.0,
280
  step=0.05,
281
+ value=0.5,
282
+ label="Repaint strength",
283
+ )
284
+ components["latent_crossfade_frames"] = gr.Slider(
285
+ minimum=0,
286
+ maximum=30,
287
+ step=1,
288
+ value=10,
289
+ label="Latent crossfade frames",
290
  )
291
+ components["chunk_mask_mode"] = gr.Dropdown(
292
+ choices=["auto", "manual"],
293
+ value="auto",
294
+ label="Chunk mask",
295
  )
296
+
297
+ _build_lora_accordion(components)
 
 
298
 
299
  components["generate_btn"] = gr.Button(
300
+ "▶ Extend",
301
  variant="primary",
302
  )
303
 
 
 
 
 
 
304
  with gr.Column(scale=10):
305
+ _build_output_panel(components)
306
+
307
+ return components
308
+
309
+
310
+ def build_edit_tab() -> dict[str, gr.components.Component]:
311
+ """Edit tab body: source audio + segment + target lyrics -> repaint/morph.
312
+
313
+ Two sub-modes:
314
+
315
+ - ``repaint`` (default): paint over [segment_start_s, segment_end_s]
316
+ using ACE-Step's repaint task_type. ``segment_start_s`` and
317
+ ``segment_end_s`` are wired through the params dict to
318
+ ``repainting_start`` / ``repainting_end`` on the pipeline side.
319
+ - ``flow_edit``: caption-to-caption morph. The installed ACE-Step
320
+ ``GenerationParams`` has no ``flow_edit_*`` fields, so this
321
+ sub-mode falls back to a repaint pass with lower
322
+ ``audio_cover_strength``. The flow knobs are still captured so
323
+ they're ready once upstream adds native support.
324
+ """
325
+ components: dict[str, gr.components.Component] = {}
326
+ with gr.Row():
327
+ with gr.Column(scale=13):
328
+ components["source_audio"] = gr.Audio(
329
+ label="Source audio",
330
  type="filepath",
331
+ sources=["upload"],
332
+ elem_classes=["ams-input-audio"],
333
+ )
334
+ components["sub_mode"] = gr.Radio(
335
+ choices=["repaint", "flow_edit"],
336
+ value="repaint",
337
+ label="Edit sub-mode",
338
+ info=(
339
+ "repaint: regenerate the segment from new lyrics. "
340
+ "flow_edit: morph caption-to-caption (experimental)."
341
+ ),
342
+ )
343
+ components["source_lyrics"] = gr.Textbox(
344
+ label="Source lyrics",
345
+ lines=3,
346
+ )
347
+ components["target_lyrics"] = gr.Textbox(
348
+ label="Target lyrics",
349
+ placeholder="[chorus] new chorus replaces the old",
350
+ lines=3,
351
  )
352
+ with gr.Row():
353
+ components["segment_start_s"] = gr.Number(
354
+ value=0.0,
355
+ label="Segment start (s)",
356
+ precision=1,
357
+ )
358
+ components["segment_end_s"] = gr.Number(
359
+ value=30.0,
360
+ label="Segment end (s)",
361
+ precision=1,
362
+ )
363
+
364
+ with gr.Accordion(
365
+ "Repaint options (experimental)",
366
+ open=False,
367
+ elem_classes=["ams-experimental"],
368
+ ):
369
+ gr.Markdown(
370
+ "_These knobs are captured in the request but the installed "
371
+ "ACE-Step dataclass doesn't expose them yet._",
372
+ elem_classes=["ams-lora-note"],
373
+ )
374
+ components["repaint_strength"] = gr.Slider(
375
+ minimum=0.0,
376
+ maximum=1.0,
377
+ step=0.05,
378
+ value=0.5,
379
+ label="Repaint strength",
380
+ )
381
+ components["repaint_mode"] = gr.Dropdown(
382
+ choices=["balanced", "left", "right"],
383
+ value="balanced",
384
+ label="Repaint mode",
385
+ )
386
+
387
+ with gr.Accordion(
388
+ "Flow-morph options (experimental)",
389
+ open=False,
390
+ elem_classes=["ams-experimental"],
391
+ ):
392
+ gr.Markdown(
393
+ "_flow_edit sub-mode currently falls back to a repaint pass with "
394
+ "lower audio_cover_strength. flow-specific params are captured "
395
+ "but not yet wired._",
396
+ elem_classes=["ams-lora-note"],
397
+ )
398
+ components["flow_source_caption"] = gr.Textbox(
399
+ label="Source caption",
400
+ placeholder="acoustic ballad, gentle piano",
401
+ )
402
+ components["flow_n_min"] = gr.Slider(
403
+ minimum=0.0, maximum=1.0, value=0.0, step=0.05, label="n_min"
404
+ )
405
+ components["flow_n_max"] = gr.Slider(
406
+ minimum=0.0, maximum=1.0, value=1.0, step=0.05, label="n_max"
407
+ )
408
+ components["flow_n_avg"] = gr.Slider(minimum=1, maximum=5, value=1, step=1, label="n_avg")
409
+
410
+ _build_lora_accordion(components)
411
+
412
+ components["generate_btn"] = gr.Button(
413
+ "▶ Apply edit",
414
+ variant="primary",
415
  )
416
 
417
+ with gr.Column(scale=10):
418
+ _build_output_panel(components)
419
+
420
  return components