| import os |
| import time |
| from datetime import datetime |
| import shutil |
| import sys |
| import json |
| import gradio as gr |
| from model_list import medley_vox_models |
| from utils.download_models import download_model |
| from assets.translations import MVSEPLESS_TRANSLATIONS as TRANSLATIONS |
|
|
| PRETRAIN_FILE = os.sep.join([os.getcwd(), "separator", "medley_vox", "pretrained_models", "xlsr_53_56k.pt"]) |
| if os.path.exists(PRETRAIN_FILE) == False: |
| os.system(f"wget -O {PRETRAIN_FILE} https://huggingface.co/Sucial/MedleyVox-Inference-WebUI/resolve/main/pretrained/xlsr_53_56k.pt?download=true") |
|
|
| CURRENT_LANG = "ru" |
| MODELS_CACHE_DIR = os.path.join(os.getcwd(), os.path.join("separator", "models_cache")) |
| OUTPUT_FORMATS = ["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "aiff"] |
| OUTPUT_DIR = "/content/output" |
|
|
| def set_language(lang): |
| global CURRENT_LANG |
| CURRENT_LANG = lang |
|
|
| def t(key, **kwargs): |
| """Функция для получения перевода с подстановкой значений""" |
| translation = TRANSLATIONS[CURRENT_LANG].get(key, key) |
| return translation.format(**kwargs) if kwargs else translation |
|
|
| def medley_voxer(input, output, model_name, output_format, stereo_mode): |
| config_url = medley_vox_models[model_name]["config_url"] |
| checkpoint_url = medley_vox_models[model_name]["checkpoint_url"] |
| medley_vox_model_dir = download_model(MODELS_CACHE_DIR, model_name, "medley_vox", checkpoint_url, config_url) |
| command = ( |
| f"python -m separator.medley_vox.svs.inference " |
| f"--inference_data_dir '{input}' " |
| f"--results_save_dir '{output}' " |
| f"--model_dir '{medley_vox_model_dir}' " |
| f"--exp_name {model_name} " |
| f"--use_overlapadd=ola " |
| f"--stereo '{stereo_mode}' " |
| f"--output_format {output_format} " |
| ) |
| os.system(command) |
| results_path = os.path.join(output, "results.json") |
| if os.path.exists(results_path): |
| with open(results_path) as f: |
| return json.load(f) |
| return [] |
|
|
| def medley_voxer_gradio(input, output, model_name, output_format, stereo_mode): |
| output_audio = medley_voxer(input, output, model_name, output_format, stereo_mode) |
| results = [] |
| if output_audio is not None: |
| for i, (stem, output_file) in enumerate(output_audio[:2]): |
| results.append(gr.update( |
| visible=True, |
| label=stem, |
| value=output_file |
| )) |
| return tuple(results) |
|
|
|
|
| |
| |
| |
| def multi_voxer(input, output, model_name, output_format, stereo_mode, stems): |
| output_audio = medley_voxer(input, output, model_name, output_format, stereo_mode) |
| results = [] |
| if stems == 2: |
| return output_audio |
| |
| if stems == 4: |
| for stem, file in output_audio: |
| voxes = medley_voxer(file, output, model_name, output_format, stereo_mode) |
| results.extend(voxes) |
| print(results) |
| return results |
|
|
| if stems == 8: |
| for stem, file in output_audio: |
| voxes = medley_voxer(file, output, model_name, output_format, stereo_mode) |
| for stem2, file2 in voxes: |
| voxes2 = medley_voxer(file2, output, model_name, output_format, stereo_mode) |
| results.extend(voxes2) |
| print(results) |
| return results |
| |
| if stems == 16: |
| for stem, file in output_audio: |
| voxes = medley_voxer(file, output, model_name, output_format, stereo_mode) |
| for stem2, file2 in voxes: |
| voxes2 = medley_voxer(file2, output, model_name, output_format, stereo_mode) |
| for stem3, file3 in voxes2: |
| voxes3 = medley_voxer(file3, output, model_name, output_format, stereo_mode) |
| results.extend(voxes3) |
| print(results) |
| return results |
|
|
|
|
| |
|
|
| def multi_voxer_gradio(input, output, model_name, output_format, stereo_mode, stems): |
|
|
| output_audio = multi_voxer(input, output, model_name, output_format, stereo_mode, stems) |
| batch_names = [] |
| if output_audio is not None: |
| for i, (stem, output_file) in enumerate(output_audio[:20]): |
| batch_names.append(gr.update( |
| visible=True, |
| label=stem, |
| value=output_file |
| )) |
| |
| while len(batch_names) < 20: |
| batch_names.append(gr.update(visible=False, label=None, value=None)) |
| return tuple(batch_names) |
|
|
| def medley_vox_plugin_name(): |
| return "Medley-Vox" |
|
|
| def medley_vox_plugin(lang): |
| set_language(lang) |
| output_dir = gr.Text(value="/content/output/", visible=False) |
| with gr.Tab(t("inference")): |
| with gr.Row(equal_height=True): |
| with gr.Column(): |
| input_voice = gr.Audio(show_label=False, type="filepath", interactive=True) |
| with gr.Column(): |
| vox_model_name = gr.Dropdown(label=t("vox_model_name"), choices=list(medley_vox_models.keys()), value=list(medley_vox_models.keys())[0], interactive=True, filterable=False) |
| stereo_mode = gr.Dropdown(label=t("vox_stereo_mode"), choices=["mono", "full"], value="mono", interactive=True, filterable=False) |
| output_vox_format = gr.Dropdown(label=t("vox_output_format"), choices=list(filter(lambda fmt: fmt != "ogg", OUTPUT_FORMATS)), value="mp3", interactive=True, filterable=False) |
| separate_vox_btn = gr.Button(t("separate_vocals_btn"), variant="primary") |
| output_voxes = [gr.Audio(visible=(i == 0), interactive=False, type="filepath", show_download_button=True) for i in range(2)] |
|
|
| with gr.Tab(t("vocal_multi_separation")): |
| with gr.Row(equal_height=True): |
| with gr.Column(): |
| input_vox = gr.Audio(show_label=False, type="filepath", interactive=True) |
| with gr.Column(): |
| vox_m_model_name = gr.Dropdown(label=t("vox_model_name"), choices=list(medley_vox_models.keys()), value=list(medley_vox_models.keys())[0], interactive=True, filterable=False) |
| with gr.Row(): |
| stereo_m_mode = gr.Dropdown(label=t("vox_stereo_mode"), choices=["mono", "full"], value="mono", interactive=True, filterable=False) |
| count_stems = gr.Dropdown(label=t("vox_count_stems"), choices=[2, 4, 8, 16], value=2, interactive=True, filterable=False) |
| output_m_vox_format = gr.Dropdown(label=t("vox_output_format"), choices=list(filter(lambda fmt: fmt != "ogg", OUTPUT_FORMATS)), value="mp3", interactive=True, filterable=False) |
| separate_m_vox_btn = gr.Button(t("vox_multi_separate_btn"), variant="primary") |
| output_m_voxes = [gr.Audio(visible=(i == 0), interactive=False, type="filepath", show_download_button=True) for i in range(20)] |
|
|
| separate_m_vox_btn.click(fn=(lambda : os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))), inputs=None, outputs=output_dir).then(fn=multi_voxer_gradio, inputs=[input_vox, output_dir, vox_m_model_name, output_m_vox_format, stereo_m_mode, count_stems], outputs=[*output_m_voxes]) |
|
|
| separate_vox_btn.click(fn=(lambda : os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))), inputs=None, outputs=output_dir).then(fn=medley_voxer_gradio, inputs=[input_voice, output_dir, vox_model_name, output_vox_format, stereo_mode], outputs=output_voxes) |
|
|
|
|