Spaces:

techfreakworm
/

ACE-Music-Studio

Running on Zero

App Files Files Community

techfreakworm commited on 2 days ago

Commit

0ef5d8d

unverified ·

1 Parent(s): 7388985

feat(ui): expand tooltips.py + apply info= to cover/extend/edit/lyrics tabs

Browse files

Adds COVER_*, EXTEND_*, EDIT_*, LYRICS_*, LORA_*, and POST_* tooltip
strings to the central tooltips module and wires them into the
corresponding ui.py builders. gr.Audio / gr.File widgets don't accept
info= in Gradio 6.14 — those are skipped (visible labels carry the
intent already). gr.Button likewise has no info= so the post-process
action row keeps its descriptive button labels as the affordance.

Files changed (2) hide show

tooltips.py +45 -0
ui.py +22 -6

tooltips.py CHANGED Viewed

@@ -1,6 +1,51 @@
 """Centralised tooltip / `info=` strings — single source of truth."""
 GENERATE_PROMPT = "Describe the song. Genre, instruments, tempo, mood."
 GENERATE_LYRICS = "Use [verse] [chorus] [bridge] tags. Open the Lyrics tab to draft with Qwen 2.5."
 GENERATE_DURATION = "Output length in seconds. Longer outputs cost more compute."
 GENERATE_VOCAL = "With vocals: full song. Instrumental: no singing, just music."

 """Centralised tooltip / `info=` strings — single source of truth."""
+# --- Generate tab ---
 GENERATE_PROMPT = "Describe the song. Genre, instruments, tempo, mood."
 GENERATE_LYRICS = "Use [verse] [chorus] [bridge] tags. Open the Lyrics tab to draft with Qwen 2.5."
 GENERATE_DURATION = "Output length in seconds. Longer outputs cost more compute."
 GENERATE_VOCAL = "With vocals: full song. Instrumental: no singing, just music."
+# --- Cover tab ---
+COVER_REF_AUDIO = "Reference clip (≤ 60 s recommended). The first ~12 s drives the style most strongly."
+COVER_PROMPT = "Override the reference's vibe with a new style direction. Leave blank to inherit fully."
+COVER_LYRICS = "New lyrics sung over the reference style. Use [verse] [chorus] tags."
+COVER_DURATION = "Length of the generated cover."
+COVER_STRENGTH = "0.0 = ignore reference. 1.0 = clone reference. 0.93 is a balanced default."
+# --- Extend tab ---
+EXTEND_SEED_AUDIO = "The song to continue. Last few seconds influence the extension most."
+EXTEND_PROMPT = "Style hint for what should come next."
+EXTEND_LYRICS = "Lyrics for the extension (optional — leave blank for instrumental continuation)."
+EXTEND_DURATION = "Extra time to generate after the seed."
+EXTEND_CROSSFADE = "Smooth the seam between seed and extension (experimental — not yet wired in the installed acestep build)."
+# --- Edit tab ---
+EDIT_SOURCE_AUDIO = "The existing song. The segment you select will be regenerated."
+EDIT_SUB_MODE = "repaint: rewrite the segment with new lyrics. flow_edit: morph the caption (experimental — falls back to a low-strength repaint in this build)."
+EDIT_SOURCE_LYRICS = "Original lyrics for context."
+EDIT_TARGET_LYRICS = "What the new segment should sing."
+EDIT_SEGMENT_START = "Where the editable segment begins (seconds into the source)."
+EDIT_SEGMENT_END = "Where the editable segment ends (seconds into the source)."
+# --- Lyrics tab ---
+LYRICS_BRIEF = "Describe the song. Tone, mood, references, lines to avoid. Free-form prose."
+LYRICS_STRUCTURE = "Section sequence. Comma-separated. The LM honors this layout."
+LYRICS_LANGUAGE = "Output language for the lyrics. Qwen 2.5 7B handles 10+ languages well."
+LYRICS_TONE = "Comma-separated descriptors. Influences word choice and rhythm."
+LYRICS_TEMPERATURE = "0.0 = deterministic. 1.0 = creative. 0.85 balances both."
+LYRICS_TOP_P = "Nucleus sampling. 0.9 keeps coherence with a bit of variety."
+LYRICS_TOP_K = "Limits the candidate token pool. 40 is a good default; 0 disables."
+LYRICS_MAX_TOKENS = "Generation budget. 600 tokens ≈ 30 lines."
+# --- LoRA accordion (shared across all song modes) ---
+LORA_PRESET = "Pick an official ACE-Step LoRA — downloads from Hugging Face on first use."
+LORA_UPLOAD = (
+    "Upload any compatible .safetensors LoRA. Header is validated against ACE-Step 1.5 XL DiT modules."
+)
+LORA_STRENGTH = "0.0 = LoRA disabled. 1.0 = full effect. > 1.0 = overdrive (may degrade quality)."
+# --- Post-process action row ---
+POST_STEMS = "Run Demucs (htdemucs_ft) to split into vocals / drums / bass / other."
+POST_NORMALISE = "Normalise output to -14 LUFS (streaming spec)."
+POST_MP3 = "Export the current output as a 320 kbps stereo MP3."

ui.py CHANGED Viewed

@@ -60,6 +60,7 @@ def _build_lora_accordion(components: dict[str, gr.components.Component]) -> Non
             step=0.05,
             value=0.95,
             label="Strength",
             elem_classes=["ams-lora-strength"],
         )
         components["lora_active"] = gr.Markdown(
@@ -212,11 +213,13 @@ def build_cover_tab() -> dict[str, gr.components.Component]:
                 label="New style prompt (optional)",
                 placeholder="faster, more aggressive leads",
                 lines=2,
             )
             components["lyrics"] = gr.Textbox(
                 label="New lyrics",
                 placeholder="[verse] new lyrics over the reference style",
                 lines=5,
             )
             with gr.Row():
                 components["duration_s"] = gr.Slider(
@@ -225,6 +228,7 @@ def build_cover_tab() -> dict[str, gr.components.Component]:
                     step=5,
                     value=30,
                     label="Duration (s)",
                 )
                 components["audio_cover_strength"] = gr.Slider(
                     minimum=0.0,
@@ -232,7 +236,7 @@ def build_cover_tab() -> dict[str, gr.components.Component]:
                     step=0.01,
                     value=0.93,
                     label="Cover strength",
-                    info="Higher = closer to reference. Lower = more drift.",
                 )
             _build_lora_accordion(components)
@@ -275,11 +279,13 @@ def build_extend_tab() -> dict[str, gr.components.Component]:
                 label="Extension prompt",
                 placeholder="build to climax, layered acid leads",
                 lines=2,
             )
             components["extension_lyrics"] = gr.Textbox(
                 label="Extension lyrics (optional)",
                 placeholder="[bridge] the drop is coming...",
                 lines=4,
             )
             with gr.Row():
                 components["extra_duration_s"] = gr.Slider(
@@ -288,6 +294,7 @@ def build_extend_tab() -> dict[str, gr.components.Component]:
                     step=5,
                     value=60,
                     label="Extra duration (s)",
                 )
                 components["wav_crossfade_s"] = gr.Slider(
                     minimum=0.0,
@@ -295,7 +302,7 @@ def build_extend_tab() -> dict[str, gr.components.Component]:
                     step=0.1,
                     value=2.0,
                     label="WAV crossfade (s)",
-                    info="Experimental — not yet wired in this acestep build.",
                 )
             with gr.Accordion(
@@ -374,30 +381,31 @@ def build_edit_tab() -> dict[str, gr.components.Component]:
                 choices=["repaint", "flow_edit"],
                 value="repaint",
                 label="Edit sub-mode",
-                info=(
-                    "repaint: regenerate the segment from new lyrics. "
-                    "flow_edit: morph caption-to-caption (experimental)."
-                ),
             )
             components["source_lyrics"] = gr.Textbox(
                 label="Source lyrics",
                 lines=3,
             )
             components["target_lyrics"] = gr.Textbox(
                 label="Target lyrics",
                 placeholder="[chorus] new chorus replaces the old",
                 lines=3,
             )
             with gr.Row():
                 components["segment_start_s"] = gr.Number(
                     value=0.0,
                     label="Segment start (s)",
                     precision=1,
                 )
                 components["segment_end_s"] = gr.Number(
                     value=30.0,
                     label="Segment end (s)",
                     precision=1,
                 )
             with gr.Accordion(
@@ -484,16 +492,19 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
                 label="Brief",
                 lines=4,
                 placeholder=("Describe the song. Tone, mood, references, specific images, lines to avoid…"),
             )
             with gr.Row():
                 c["structure"] = gr.Textbox(
                     label="Structure",
                     value="intro, verse, chorus, verse, chorus, bridge, chorus, outro",
                 )
                 c["language"] = gr.Dropdown(
                     choices=["en", "zh", "ja", "ko", "es", "fr", "de"],
                     value="en",
                     label="Language",
                 )
             with gr.Row():
                 c["verse_lines"] = gr.Slider(
@@ -520,6 +531,7 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
             c["tone"] = gr.Textbox(
                 label="Tone / mood",
                 placeholder="euphoric, hypnotic, transcendent, not cheesy",
             )
             c["rhyme"] = gr.Radio(
                 choices=["strict", "loose", "none"],
@@ -537,6 +549,7 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
                     value=0.85,
                     step=0.05,
                     label="Temperature",
                 )
                 c["top_p"] = gr.Slider(
                     minimum=0.0,
@@ -544,6 +557,7 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
                     value=0.9,
                     step=0.05,
                     label="Top-p",
                 )
                 c["top_k"] = gr.Slider(
                     minimum=0,
@@ -551,6 +565,7 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
                     value=40,
                     step=1,
                     label="Top-k",
                 )
                 c["max_new_tokens"] = gr.Slider(
                     minimum=100,
@@ -558,6 +573,7 @@ def build_lyrics_tab() -> dict[str, gr.components.Component]:
                     value=600,
                     step=50,
                     label="Max new tokens",
                 )
                 c["seed"] = gr.Number(
                     value=42,

             step=0.05,
             value=0.95,
             label="Strength",
+            info=tooltips.LORA_STRENGTH,
             elem_classes=["ams-lora-strength"],
         )
         components["lora_active"] = gr.Markdown(
                 label="New style prompt (optional)",
                 placeholder="faster, more aggressive leads",
                 lines=2,
+                info=tooltips.COVER_PROMPT,
             )
             components["lyrics"] = gr.Textbox(
                 label="New lyrics",
                 placeholder="[verse] new lyrics over the reference style",
                 lines=5,
+                info=tooltips.COVER_LYRICS,
             )
             with gr.Row():
                 components["duration_s"] = gr.Slider(
                     step=5,
                     value=30,
                     label="Duration (s)",
+                    info=tooltips.COVER_DURATION,
                 )
                 components["audio_cover_strength"] = gr.Slider(
                     minimum=0.0,
                     step=0.01,
                     value=0.93,
                     label="Cover strength",
+                    info=tooltips.COVER_STRENGTH,
                 )
             _build_lora_accordion(components)
                 label="Extension prompt",
                 placeholder="build to climax, layered acid leads",
                 lines=2,
+                info=tooltips.EXTEND_PROMPT,
             )
             components["extension_lyrics"] = gr.Textbox(
                 label="Extension lyrics (optional)",
                 placeholder="[bridge] the drop is coming...",
                 lines=4,
+                info=tooltips.EXTEND_LYRICS,
             )
             with gr.Row():
                 components["extra_duration_s"] = gr.Slider(
                     step=5,
                     value=60,
                     label="Extra duration (s)",
+                    info=tooltips.EXTEND_DURATION,
                 )
                 components["wav_crossfade_s"] = gr.Slider(
                     minimum=0.0,
                     step=0.1,
                     value=2.0,
                     label="WAV crossfade (s)",
+                    info=tooltips.EXTEND_CROSSFADE,
                 )
             with gr.Accordion(
                 choices=["repaint", "flow_edit"],
                 value="repaint",
                 label="Edit sub-mode",
+                info=tooltips.EDIT_SUB_MODE,
             )
             components["source_lyrics"] = gr.Textbox(
                 label="Source lyrics",
                 lines=3,
+                info=tooltips.EDIT_SOURCE_LYRICS,
             )
             components["target_lyrics"] = gr.Textbox(
                 label="Target lyrics",
                 placeholder="[chorus] new chorus replaces the old",
                 lines=3,
+                info=tooltips.EDIT_TARGET_LYRICS,
             )
             with gr.Row():
                 components["segment_start_s"] = gr.Number(
                     value=0.0,
                     label="Segment start (s)",
                     precision=1,
+                    info=tooltips.EDIT_SEGMENT_START,
                 )
                 components["segment_end_s"] = gr.Number(
                     value=30.0,
                     label="Segment end (s)",
                     precision=1,
+                    info=tooltips.EDIT_SEGMENT_END,
                 )
             with gr.Accordion(
                 label="Brief",
                 lines=4,
                 placeholder=("Describe the song. Tone, mood, references, specific images, lines to avoid…"),
+                info=tooltips.LYRICS_BRIEF,
             )
             with gr.Row():
                 c["structure"] = gr.Textbox(
                     label="Structure",
                     value="intro, verse, chorus, verse, chorus, bridge, chorus, outro",
+                    info=tooltips.LYRICS_STRUCTURE,
                 )
                 c["language"] = gr.Dropdown(
                     choices=["en", "zh", "ja", "ko", "es", "fr", "de"],
                     value="en",
                     label="Language",
+                    info=tooltips.LYRICS_LANGUAGE,
                 )
             with gr.Row():
                 c["verse_lines"] = gr.Slider(
             c["tone"] = gr.Textbox(
                 label="Tone / mood",
                 placeholder="euphoric, hypnotic, transcendent, not cheesy",
+                info=tooltips.LYRICS_TONE,
             )
             c["rhyme"] = gr.Radio(
                 choices=["strict", "loose", "none"],
                     value=0.85,
                     step=0.05,
                     label="Temperature",
+                    info=tooltips.LYRICS_TEMPERATURE,
                 )
                 c["top_p"] = gr.Slider(
                     minimum=0.0,
                     value=0.9,
                     step=0.05,
                     label="Top-p",
+                    info=tooltips.LYRICS_TOP_P,
                 )
                 c["top_k"] = gr.Slider(
                     minimum=0,
                     value=40,
                     step=1,
                     label="Top-k",
+                    info=tooltips.LYRICS_TOP_K,
                 )
                 c["max_new_tokens"] = gr.Slider(
                     minimum=100,
                     value=600,
                     step=50,
                     label="Max new tokens",
+                    info=tooltips.LYRICS_MAX_TOKENS,
                 )
                 c["seed"] = gr.Number(
                     value=42,