| import spaces |
| import logging |
| from datetime import datetime |
| from pathlib import Path |
| import gradio as gr |
| import torch |
| import torchaudio |
| import os |
| import requests |
| from transformers import pipeline |
| import tempfile |
| import numpy as np |
| from einops import rearrange |
| import cv2 |
| from scipy.io import wavfile |
| import librosa |
| import json |
| from typing import Optional, Tuple, List |
| import atexit |
|
|
| |
| os.environ["TRANSFORMERS_ALLOW_UNSAFE_DESERIALIZATION"] = "1" |
|
|
| try: |
| import mmaudio |
| except ImportError: |
| os.system("pip install -e .") |
| import mmaudio |
|
|
| from mmaudio.eval_utils import (ModelConfig, all_model_cfg, generate, load_video, make_video, |
| setup_eval_logging) |
| from mmaudio.model.flow_matching import FlowMatching |
| from mmaudio.model.networks import MMAudio, get_my_mmaudio |
| from mmaudio.model.sequence_config import SequenceConfig |
| from mmaudio.model.utils.features_utils import FeaturesUtils |
|
|
| |
| logging.basicConfig( |
| level=logging.INFO, |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' |
| ) |
| log = logging.getLogger() |
|
|
| |
| if torch.cuda.is_available(): |
| device = torch.device("cuda") |
| torch.backends.cuda.matmul.allow_tf32 = True |
| torch.backends.cudnn.allow_tf32 = True |
| torch.backends.cudnn.benchmark = True |
| else: |
| device = torch.device("cpu") |
|
|
| dtype = torch.bfloat16 |
|
|
| |
| model: ModelConfig = all_model_cfg['large_44k_v2'] |
| model.download_if_needed() |
| output_dir = Path('./output/gradio') |
|
|
| setup_eval_logging() |
|
|
| |
| try: |
| translator = pipeline("translation", |
| model="Helsinki-NLP/opus-mt-ko-en", |
| device="cpu", |
| use_fast=True, |
| trust_remote_code=False) |
| except Exception as e: |
| log.warning(f"Failed to load translation model with safetensors: {e}") |
| try: |
| translator = pipeline("translation", |
| model="Helsinki-NLP/opus-mt-ko-en", |
| device="cpu") |
| except Exception as e2: |
| log.error(f"Failed to load translation model: {e2}") |
| translator = None |
|
|
| PIXABAY_API_KEY = "33492762-a28a596ec4f286f84cd328b17" |
|
|
| def cleanup_temp_files(): |
| temp_dir = tempfile.gettempdir() |
| for file in os.listdir(temp_dir): |
| if file.endswith(('.mp4', '.flac')): |
| try: |
| os.remove(os.path.join(temp_dir, file)) |
| except: |
| pass |
|
|
| atexit.register(cleanup_temp_files) |
|
|
| def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]: |
| with torch.cuda.device(device): |
| seq_cfg = model.seq_cfg |
| net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval() |
| net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True)) |
| log.info(f'Loaded weights from {model.model_path}') |
|
|
| feature_utils = FeaturesUtils( |
| tod_vae_ckpt=model.vae_path, |
| synchformer_ckpt=model.synchformer_ckpt, |
| enable_conditions=True, |
| mode=model.mode, |
| bigvgan_vocoder_ckpt=model.bigvgan_16k_path, |
| need_vae_encoder=False |
| ).to(device, dtype).eval() |
|
|
| return net, feature_utils, seq_cfg |
|
|
| net, feature_utils, seq_cfg = get_model() |
|
|
| def translate_prompt(text): |
| try: |
| if translator is None: |
| return text |
| |
| if text and any(ord(char) >= 0x3131 and ord(char) <= 0xD7A3 for char in text): |
| with torch.no_grad(): |
| translation = translator(text)[0]['translation_text'] |
| return translation |
| return text |
| except Exception as e: |
| logging.error(f"Translation error: {e}") |
| return text |
|
|
| @torch.no_grad() |
| def search_videos(query): |
| try: |
| query = translate_prompt(query) |
| return search_pixabay_videos(query, PIXABAY_API_KEY) |
| except Exception as e: |
| logging.error(f"Video search error: {e}") |
| return [] |
|
|
| def search_pixabay_videos(query, api_key): |
| try: |
| base_url = "https://pixabay.com/api/videos/" |
| params = { |
| "key": api_key, |
| "q": query, |
| "per_page": 40 |
| } |
| |
| response = requests.get(base_url, params=params) |
| if response.status_code == 200: |
| data = response.json() |
| return [video['videos']['large']['url'] for video in data.get('hits', [])] |
| return [] |
| except Exception as e: |
| logging.error(f"Pixabay API error: {e}") |
| return [] |
|
|
| @spaces.GPU |
| @torch.inference_mode() |
| def video_to_audio(video: gr.Video, prompt: str, negative_prompt: str, seed: int, num_steps: int, |
| cfg_strength: float, duration: float): |
| prompt = translate_prompt(prompt) |
| negative_prompt = translate_prompt(negative_prompt) |
|
|
| rng = torch.Generator(device=device) |
| rng.manual_seed(seed) |
| fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
| clip_frames, sync_frames, duration = load_video(video, duration) |
| clip_frames = clip_frames.unsqueeze(0) |
| sync_frames = sync_frames.unsqueeze(0) |
| seq_cfg.duration = duration |
| net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len) |
|
|
| audios = generate(clip_frames, |
| sync_frames, [prompt], |
| negative_text=[negative_prompt], |
| feature_utils=feature_utils, |
| net=net, |
| fm=fm, |
| rng=rng, |
| cfg_strength=cfg_strength) |
| audio = audios.float().cpu()[0] |
|
|
| video_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name |
| make_video(video, |
| video_save_path, |
| audio, |
| sampling_rate=seq_cfg.sampling_rate, |
| duration_sec=seq_cfg.duration) |
| |
| |
| info_log = f"""β
VIDEO TO AUDIO COMPLETE! |
| {'=' * 50} |
| π¬ Video Info: |
| β’ Duration: {duration:.2f} seconds |
| {'=' * 50} |
| βοΈ Generation Settings: |
| β’ Seed: {seed} |
| β’ Steps: {num_steps} |
| β’ CFG Strength: {cfg_strength} |
| {'=' * 50} |
| π Prompts: |
| β’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''} |
| β’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''} |
| {'=' * 50} |
| πΎ Video with audio ready!""" |
| |
| return video_save_path, info_log |
|
|
| @spaces.GPU |
| @torch.inference_mode() |
| def text_to_audio(prompt: str, negative_prompt: str, seed: int, num_steps: int, cfg_strength: float, |
| duration: float): |
| prompt = translate_prompt(prompt) |
| negative_prompt = translate_prompt(negative_prompt) |
|
|
| rng = torch.Generator(device=device) |
| rng.manual_seed(seed) |
| fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps) |
|
|
| clip_frames = sync_frames = None |
| seq_cfg.duration = duration |
| net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len) |
|
|
| audios = generate(clip_frames, |
| sync_frames, [prompt], |
| negative_text=[negative_prompt], |
| feature_utils=feature_utils, |
| net=net, |
| fm=fm, |
| rng=rng, |
| cfg_strength=cfg_strength) |
| audio = audios.float().cpu()[0] |
|
|
| audio_save_path = tempfile.NamedTemporaryFile(delete=False, suffix='.flac').name |
| torchaudio.save(audio_save_path, audio, seq_cfg.sampling_rate) |
| |
| |
| info_log = f"""β
TEXT TO AUDIO COMPLETE! |
| {'=' * 50} |
| π΅ Audio Info: |
| β’ Duration: {duration:.2f} seconds |
| β’ Sample Rate: {seq_cfg.sampling_rate} Hz |
| {'=' * 50} |
| βοΈ Generation Settings: |
| β’ Seed: {seed} |
| β’ Steps: {num_steps} |
| β’ CFG Strength: {cfg_strength} |
| {'=' * 50} |
| π Prompts: |
| β’ Prompt: {prompt[:40]}{'...' if len(prompt) > 40 else ''} |
| β’ Negative: {negative_prompt[:30]}{'...' if len(negative_prompt) > 30 else ''} |
| {'=' * 50} |
| πΎ Audio ready to download!""" |
| |
| return audio_save_path, info_log |
|
|
|
|
| |
| |
| |
|
|
| css = """ |
| /* ===== π¨ Google Fonts Import ===== */ |
| @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap'); |
| |
| /* ===== π¨ Comic Classic λ°°κ²½ - λΉν°μ§ νμ΄νΌ + λνΈ ν¨ν΄ ===== */ |
| .gradio-container { |
| background-color: #FEF9C3 !important; |
| background-image: |
| radial-gradient(#1F2937 1px, transparent 1px) !important; |
| background-size: 20px 20px !important; |
| min-height: 100vh !important; |
| font-family: 'Comic Neue', cursive, sans-serif !important; |
| } |
| |
| /* ===== νκΉ
νμ΄μ€ μλ¨ μμ μ¨κΉ ===== */ |
| .huggingface-space-header, |
| #space-header, |
| .space-header, |
| [class*="space-header"], |
| .svelte-1ed2p3z, |
| .space-header-badge, |
| .header-badge, |
| [data-testid="space-header"], |
| .svelte-kqij2n, |
| .svelte-1ax1toq, |
| .embed-container > div:first-child { |
| display: none !important; |
| visibility: hidden !important; |
| height: 0 !important; |
| width: 0 !important; |
| overflow: hidden !important; |
| opacity: 0 !important; |
| pointer-events: none !important; |
| } |
| |
| /* ===== Footer μμ μ¨κΉ ===== */ |
| footer, |
| .footer, |
| .gradio-container footer, |
| .built-with, |
| [class*="footer"], |
| .gradio-footer, |
| .main-footer, |
| div[class*="footer"], |
| .show-api, |
| .built-with-gradio, |
| a[href*="gradio.app"], |
| a[href*="huggingface.co/spaces"] { |
| display: none !important; |
| visibility: hidden !important; |
| height: 0 !important; |
| padding: 0 !important; |
| margin: 0 !important; |
| } |
| |
| /* ===== λ©μΈ 컨ν
μ΄λ ===== */ |
| #col-container { |
| max-width: 1200px; |
| margin: 0 auto; |
| } |
| |
| /* ===== π¨ ν€λ νμ΄ν - μ½λ―Ή μ€νμΌ ===== */ |
| .header-text h1 { |
| font-family: 'Bangers', cursive !important; |
| color: #1F2937 !important; |
| font-size: 3.5rem !important; |
| font-weight: 400 !important; |
| text-align: center !important; |
| margin-bottom: 0.5rem !important; |
| text-shadow: |
| 4px 4px 0px #FACC15, |
| 6px 6px 0px #1F2937 !important; |
| letter-spacing: 3px !important; |
| -webkit-text-stroke: 2px #1F2937 !important; |
| } |
| |
| /* ===== π¨ μλΈνμ΄ν ===== */ |
| .subtitle { |
| text-align: center !important; |
| font-family: 'Comic Neue', cursive !important; |
| font-size: 1.2rem !important; |
| color: #1F2937 !important; |
| margin-bottom: 1.5rem !important; |
| font-weight: 700 !important; |
| } |
| |
| /* ===== π¨ ν μ€νμΌ ===== */ |
| .tabs { |
| background: #FFFFFF !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 12px !important; |
| box-shadow: 6px 6px 0px #1F2937 !important; |
| padding: 10px !important; |
| } |
| |
| .tab-nav { |
| background: #FACC15 !important; |
| border-radius: 8px !important; |
| padding: 5px !important; |
| border: 2px solid #1F2937 !important; |
| } |
| |
| .tab-nav button { |
| font-family: 'Bangers', cursive !important; |
| font-size: 1.1rem !important; |
| letter-spacing: 1px !important; |
| color: #1F2937 !important; |
| background: transparent !important; |
| border: none !important; |
| padding: 10px 20px !important; |
| border-radius: 6px !important; |
| transition: all 0.2s ease !important; |
| } |
| |
| .tab-nav button:hover { |
| background: #FEF3C7 !important; |
| } |
| |
| .tab-nav button.selected { |
| background: #3B82F6 !important; |
| color: #FFFFFF !important; |
| box-shadow: 3px 3px 0px #1F2937 !important; |
| } |
| |
| /* ===== π¨ μΉ΄λ/ν¨λ - λ§ν νλ μ μ€νμΌ ===== */ |
| .gr-panel, |
| .gr-box, |
| .gr-form, |
| .block, |
| .gr-group { |
| background: #FFFFFF !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| box-shadow: 6px 6px 0px #1F2937 !important; |
| transition: all 0.2s ease !important; |
| } |
| |
| .gr-panel:hover, |
| .block:hover { |
| transform: translate(-2px, -2px) !important; |
| box-shadow: 8px 8px 0px #1F2937 !important; |
| } |
| |
| /* ===== π¨ μ
λ ₯ νλ (Textbox) ===== */ |
| textarea, |
| input[type="text"], |
| input[type="number"] { |
| background: #FFFFFF !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| color: #1F2937 !important; |
| font-family: 'Comic Neue', cursive !important; |
| font-size: 1rem !important; |
| font-weight: 700 !important; |
| transition: all 0.2s ease !important; |
| } |
| |
| textarea:focus, |
| input[type="text"]:focus, |
| input[type="number"]:focus { |
| border-color: #3B82F6 !important; |
| box-shadow: 4px 4px 0px #3B82F6 !important; |
| outline: none !important; |
| } |
| |
| textarea::placeholder { |
| color: #9CA3AF !important; |
| font-weight: 400 !important; |
| } |
| |
| /* ===== π¨ Primary λ²νΌ - μ½λ―Ή λΈλ£¨ ===== */ |
| .gr-button-primary, |
| button.primary, |
| .gr-button.primary { |
| background: #3B82F6 !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| color: #FFFFFF !important; |
| font-family: 'Bangers', cursive !important; |
| font-weight: 400 !important; |
| font-size: 1.3rem !important; |
| letter-spacing: 2px !important; |
| padding: 14px 28px !important; |
| box-shadow: 5px 5px 0px #1F2937 !important; |
| transition: all 0.1s ease !important; |
| text-shadow: 1px 1px 0px #1F2937 !important; |
| } |
| |
| .gr-button-primary:hover, |
| button.primary:hover, |
| .gr-button.primary:hover { |
| background: #2563EB !important; |
| transform: translate(-2px, -2px) !important; |
| box-shadow: 7px 7px 0px #1F2937 !important; |
| } |
| |
| .gr-button-primary:active, |
| button.primary:active, |
| .gr-button.primary:active { |
| transform: translate(3px, 3px) !important; |
| box-shadow: 2px 2px 0px #1F2937 !important; |
| } |
| |
| /* ===== π¨ Secondary λ²νΌ - μ½λ―Ή λ λ ===== */ |
| .gr-button-secondary, |
| button.secondary { |
| background: #EF4444 !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| color: #FFFFFF !important; |
| font-family: 'Bangers', cursive !important; |
| font-weight: 400 !important; |
| font-size: 1.1rem !important; |
| letter-spacing: 1px !important; |
| box-shadow: 4px 4px 0px #1F2937 !important; |
| transition: all 0.1s ease !important; |
| text-shadow: 1px 1px 0px #1F2937 !important; |
| } |
| |
| .gr-button-secondary:hover, |
| button.secondary:hover { |
| background: #DC2626 !important; |
| transform: translate(-2px, -2px) !important; |
| box-shadow: 6px 6px 0px #1F2937 !important; |
| } |
| |
| /* ===== π¨ λ‘κ·Έ μΆλ ₯ μμ ===== */ |
| .info-log textarea { |
| background: #1F2937 !important; |
| color: #10B981 !important; |
| font-family: 'Courier New', monospace !important; |
| font-size: 0.9rem !important; |
| font-weight: 400 !important; |
| border: 3px solid #10B981 !important; |
| border-radius: 8px !important; |
| box-shadow: 4px 4px 0px #10B981 !important; |
| } |
| |
| /* ===== π¨ λΉλμ€/μ€λμ€ μμ ===== */ |
| .gr-video, |
| .gr-audio, |
| video, |
| audio { |
| border: 4px solid #1F2937 !important; |
| border-radius: 8px !important; |
| box-shadow: 8px 8px 0px #1F2937 !important; |
| overflow: hidden !important; |
| background: #FFFFFF !important; |
| } |
| |
| /* ===== π¨ κ°€λ¬λ¦¬ μ€νμΌ ===== */ |
| .gr-gallery { |
| background: #FFFFFF !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| box-shadow: 6px 6px 0px #1F2937 !important; |
| padding: 10px !important; |
| } |
| |
| .gr-gallery .thumbnail-item { |
| border: 3px solid #1F2937 !important; |
| border-radius: 6px !important; |
| transition: all 0.2s ease !important; |
| overflow: hidden !important; |
| } |
| |
| .gr-gallery .thumbnail-item:hover { |
| transform: scale(1.05) !important; |
| box-shadow: 4px 4px 0px #3B82F6 !important; |
| } |
| |
| /* ===== π¨ μ¬λΌμ΄λ μ€νμΌ ===== */ |
| input[type="range"] { |
| accent-color: #3B82F6 !important; |
| } |
| |
| .gr-slider { |
| background: #FFFFFF !important; |
| } |
| |
| /* ===== π¨ λΌλ²¨ μ€νμΌ ===== */ |
| label, |
| .gr-input-label, |
| .gr-block-label { |
| color: #1F2937 !important; |
| font-family: 'Comic Neue', cursive !important; |
| font-weight: 700 !important; |
| font-size: 1rem !important; |
| } |
| |
| span.gr-label { |
| color: #1F2937 !important; |
| } |
| |
| /* ===== π¨ μ 보 ν
μ€νΈ ===== */ |
| .gr-info, |
| .info { |
| color: #6B7280 !important; |
| font-family: 'Comic Neue', cursive !important; |
| font-size: 0.9rem !important; |
| } |
| |
| /* ===== π¨ Number Input μ€νμΌ ===== */ |
| .gr-number input { |
| background: #FFFFFF !important; |
| border: 3px solid #1F2937 !important; |
| border-radius: 8px !important; |
| color: #1F2937 !important; |
| font-family: 'Comic Neue', cursive !important; |
| font-weight: 700 !important; |
| box-shadow: 3px 3px 0px #1F2937 !important; |
| } |
| |
| /* ===== π¨ μ€ν¬λ‘€λ° - μ½λ―Ή μ€νμΌ ===== */ |
| ::-webkit-scrollbar { |
| width: 12px; |
| height: 12px; |
| } |
| |
| ::-webkit-scrollbar-track { |
| background: #FEF9C3; |
| border: 2px solid #1F2937; |
| } |
| |
| ::-webkit-scrollbar-thumb { |
| background: #3B82F6; |
| border: 2px solid #1F2937; |
| border-radius: 0px; |
| } |
| |
| ::-webkit-scrollbar-thumb:hover { |
| background: #EF4444; |
| } |
| |
| /* ===== π¨ μ ν νμ΄λΌμ΄νΈ ===== */ |
| ::selection { |
| background: #FACC15; |
| color: #1F2937; |
| } |
| |
| /* ===== π¨ λ§ν¬ μ€νμΌ ===== */ |
| a { |
| color: #3B82F6 !important; |
| text-decoration: none !important; |
| font-weight: 700 !important; |
| } |
| |
| a:hover { |
| color: #EF4444 !important; |
| } |
| |
| /* ===== π¨ Row/Column κ°κ²© ===== */ |
| .gr-row { |
| gap: 1.5rem !important; |
| } |
| |
| .gr-column { |
| gap: 1rem !important; |
| } |
| |
| /* ===== λ°μν μ‘°μ ===== */ |
| @media (max-width: 768px) { |
| .header-text h1 { |
| font-size: 2.2rem !important; |
| text-shadow: |
| 3px 3px 0px #FACC15, |
| 4px 4px 0px #1F2937 !important; |
| } |
| |
| .gr-button-primary, |
| button.primary { |
| padding: 12px 20px !important; |
| font-size: 1.1rem !important; |
| } |
| |
| .gr-panel, |
| .block { |
| box-shadow: 4px 4px 0px #1F2937 !important; |
| } |
| |
| .tab-nav button { |
| font-size: 0.9rem !important; |
| padding: 8px 12px !important; |
| } |
| } |
| |
| /* ===== π¨ λ€ν¬λͺ¨λ λΉνμ±ν ===== */ |
| @media (prefers-color-scheme: dark) { |
| .gradio-container { |
| background-color: #FEF9C3 !important; |
| } |
| } |
| """ |
|
|
|
|
| |
| with gr.Blocks(fill_height=True, css=css, title="MMAudio Studio") as demo: |
| gr.LoginButton(value="Option: HuggingFace 'Login' for extra GPU quota +", size="sm") |
| |
| gr.HTML(""" |
| <div style="text-align: center; margin: 20px 0 10px 0;"> |
| <a href="https://www.humangen.ai" target="_blank" style="text-decoration: none;"> |
| <img src="https://img.shields.io/static/v1?label=π HOME&message=HUMANGEN.AI&color=0000ff&labelColor=ffcc00&style=for-the-badge" alt="HOME"> |
| </a> |
| </div> |
| """) |
| |
| |
| gr.Markdown( |
| """ |
| # π΅ MMAUDIO STUDIO π¬ |
| """, |
| elem_classes="header-text" |
| ) |
| |
| gr.Markdown( |
| """ |
| <p class="subtitle">π Generate Audio from Text or Video β’ Korean Supported! νκΈμ§μ π°π·</p> |
| """, |
| ) |
| |
| with gr.Tabs(): |
| |
| with gr.TabItem("π Video Search"): |
| gr.Markdown( |
| """ |
| <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
| πΉ Search for videos from Pixabay to use as input! |
| </p> |
| """ |
| ) |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| search_query = gr.Textbox( |
| label="π Search Query (νκΈμ§μ)" if translator else "π Search Query", |
| placeholder="Enter search keywords...", |
| lines=1 |
| ) |
| search_btn = gr.Button( |
| "π SEARCH VIDEOS!", |
| variant="primary", |
| size="lg" |
| ) |
| |
| search_gallery = gr.Gallery( |
| label="πΊ Search Results", |
| columns=4, |
| rows=5, |
| height=500 |
| ) |
| |
| search_btn.click( |
| fn=search_videos, |
| inputs=[search_query], |
| outputs=[search_gallery] |
| ) |
| |
| |
| with gr.TabItem("π¬ Video-to-Audio"): |
| gr.Markdown( |
| """ |
| <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
| π₯ Upload a video and generate matching audio! |
| </p> |
| """ |
| ) |
| |
| with gr.Row(equal_height=False): |
| with gr.Column(scale=1): |
| v2a_video = gr.Video(label="πΉ Input Video") |
| v2a_prompt = gr.Textbox( |
| label="βοΈ Prompt (νκΈμ§μ)" if translator else "βοΈ Prompt", |
| placeholder="Describe the audio you want...", |
| lines=2 |
| ) |
| v2a_negative = gr.Textbox( |
| label="π« Negative Prompt", |
| value="music", |
| lines=1 |
| ) |
| |
| with gr.Row(): |
| v2a_seed = gr.Number(label="π² Seed", value=0) |
| v2a_steps = gr.Number(label="π Steps", value=25) |
| |
| with gr.Row(): |
| v2a_cfg = gr.Number(label="π― Guidance Scale", value=4.5) |
| v2a_duration = gr.Number(label="β±οΈ Duration (sec)", value=8) |
| |
| v2a_btn = gr.Button( |
| "π¬ GENERATE AUDIO! π", |
| variant="primary", |
| size="lg" |
| ) |
| |
| with gr.Accordion("π Generation Log", open=True): |
| v2a_log = gr.Textbox( |
| label="", |
| placeholder="Upload video and click generate...", |
| lines=12, |
| interactive=False, |
| elem_classes="info-log" |
| ) |
| |
| with gr.Column(scale=1): |
| v2a_output = gr.Video(label="π₯ Generated Result", height=400) |
| gr.Markdown( |
| """ |
| <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;"> |
| π‘ Right-click on the video to save! |
| </p> |
| """ |
| ) |
| |
| v2a_btn.click( |
| fn=video_to_audio, |
| inputs=[v2a_video, v2a_prompt, v2a_negative, v2a_seed, v2a_steps, v2a_cfg, v2a_duration], |
| outputs=[v2a_output, v2a_log] |
| ) |
| |
| |
| with gr.TabItem("π΅ Text-to-Audio"): |
| gr.Markdown( |
| """ |
| <p style="text-align: center; font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937; margin-bottom: 1rem;"> |
| β¨ Generate audio from text description! |
| </p> |
| """ |
| ) |
| |
| with gr.Row(equal_height=False): |
| with gr.Column(scale=1): |
| t2a_prompt = gr.Textbox( |
| label="βοΈ Prompt (νκΈμ§μ)" if translator else "βοΈ Prompt", |
| placeholder="Describe the audio you want to generate...", |
| lines=3 |
| ) |
| t2a_negative = gr.Textbox( |
| label="π« Negative Prompt", |
| placeholder="What to avoid...", |
| lines=1 |
| ) |
| |
| with gr.Row(): |
| t2a_seed = gr.Number(label="π² Seed", value=0) |
| t2a_steps = gr.Number(label="π Steps", value=25) |
| |
| with gr.Row(): |
| t2a_cfg = gr.Number(label="π― Guidance Scale", value=4.5) |
| t2a_duration = gr.Number(label="β±οΈ Duration (sec)", value=8) |
| |
| t2a_btn = gr.Button( |
| "π΅ GENERATE AUDIO! β¨", |
| variant="primary", |
| size="lg" |
| ) |
| |
| with gr.Accordion("π Generation Log", open=True): |
| t2a_log = gr.Textbox( |
| label="", |
| placeholder="Enter prompt and click generate...", |
| lines=12, |
| interactive=False, |
| elem_classes="info-log" |
| ) |
| |
| with gr.Column(scale=1): |
| t2a_output = gr.Audio(label="π Generated Audio") |
| gr.Markdown( |
| """ |
| <p style="text-align: center; margin-top: 15px; font-weight: 700; color: #1F2937;"> |
| π‘ Click the download button to save! |
| </p> |
| """ |
| ) |
| |
| t2a_btn.click( |
| fn=text_to_audio, |
| inputs=[t2a_prompt, t2a_negative, t2a_seed, t2a_steps, t2a_cfg, t2a_duration], |
| outputs=[t2a_output, t2a_log] |
| ) |
|
|
|
|
| |
| if __name__ == "__main__": |
| if translator is None: |
| log.warning("Translation model failed to load. Korean translation will be disabled.") |
| |
| demo.launch(allowed_paths=[output_dir]) |