| import gradio as gr |
| import torchaudio |
| import tempfile |
| from audiocraft.models import MusicGen |
| from bark import SAMPLE_RATE as BARK_SAMPLE_RATE, generate_audio as bark_generate_audio |
| from pydub import AudioSegment |
|
|
| |
| musicgen = MusicGen.get_pretrained('facebook/musicgen-small') |
|
|
| |
| def generate_song(lyrics, genre_prompt): |
| |
| vocals = bark_generate_audio(lyrics, history_prompt="v2/en_speaker_6") |
| |
| |
| vocals_path = tempfile.mktemp(suffix=".wav") |
| torchaudio.save(vocals_path, vocals.squeeze(0).cpu(), BARK_SAMPLE_RATE) |
|
|
| |
| musicgen.set_generation_params(duration=15) |
| music = musicgen.generate([genre_prompt]) |
| music_path = tempfile.mktemp(suffix=".wav") |
| torchaudio.save(music_path, music[0].cpu(), 32000) |
|
|
| |
| vocals_seg = AudioSegment.from_wav(vocals_path) |
| music_seg = AudioSegment.from_wav(music_path) |
|
|
| |
| mixed = music_seg.overlay(vocals_seg.set_frame_rate(32000).set_channels(1)) |
| output_path = tempfile.mktemp(suffix=".wav") |
| mixed.export(output_path, format="wav") |
|
|
| return output_path |
|
|
| |
| iface = gr.Interface( |
| fn=generate_song, |
| inputs=[ |
| gr.Textbox(label="Enter Lyrics", lines=4), |
| gr.Textbox(label="Enter Genre (e.g., 'hip-hop with 808s')") |
| ], |
| outputs=gr.Audio(label="Generated Song") |
| ) |
|
|
| |
| iface. |
| launch() |