Upload 2 files

7108f39 verified 9 months ago

7.52 kB

	import os
	import time
	from datetime import datetime
	import shutil
	import sys
	import json
	import gradio as gr
	from model_list import medley_vox_models
	from utils.download_models import download_model
	from assets.translations import MVSEPLESS_TRANSLATIONS as TRANSLATIONS

	PRETRAIN_FILE = os.sep.join([os.getcwd(), "separator", "medley_vox", "pretrained_models", "xlsr_53_56k.pt"])
	if os.path.exists(PRETRAIN_FILE) == False:
	os.system(f"wget -O {PRETRAIN_FILE} https://huggingface.co/Sucial/MedleyVox-Inference-WebUI/resolve/main/pretrained/xlsr_53_56k.pt?download=true")

	CURRENT_LANG = "ru"
	MODELS_CACHE_DIR = os.path.join(os.getcwd(), os.path.join("separator", "models_cache"))
	OUTPUT_FORMATS = ["mp3", "wav", "flac", "ogg", "opus", "m4a", "aac", "aiff"]
	OUTPUT_DIR = "/content/output"

	def set_language(lang):
	global CURRENT_LANG
	CURRENT_LANG = lang

	def t(key, **kwargs):
	"""Функция для получения перевода с подстановкой значений"""
	translation = TRANSLATIONS[CURRENT_LANG].get(key, key)
	return translation.format(**kwargs) if kwargs else translation

	def medley_voxer(input, output, model_name, output_format, stereo_mode):
	config_url = medley_vox_models[model_name]["config_url"]
	checkpoint_url = medley_vox_models[model_name]["checkpoint_url"]
	medley_vox_model_dir = download_model(MODELS_CACHE_DIR, model_name, "medley_vox", checkpoint_url, config_url)
	command = (
	f"python -m separator.medley_vox.svs.inference "
	f"--inference_data_dir '{input}' "
	f"--results_save_dir '{output}' "
	f"--model_dir '{medley_vox_model_dir}' "
	f"--exp_name {model_name} "
	f"--use_overlapadd=ola "
	f"--stereo '{stereo_mode}' "
	f"--output_format {output_format} "
	)
	os.system(command)
	results_path = os.path.join(output, "results.json")
	if os.path.exists(results_path):
	with open(results_path) as f:
	return json.load(f)
	return []

	def medley_voxer_gradio(input, output, model_name, output_format, stereo_mode):
	output_audio = medley_voxer(input, output, model_name, output_format, stereo_mode)
	results = []
	if output_audio is not None:
	for i, (stem, output_file) in enumerate(output_audio[:2]):
	results.append(gr.update(
	visible=True,
	label=stem,
	value=output_file
	))
	return tuple(results)


	##############


	def multi_voxer(input, output, model_name, output_format, stereo_mode, stems):
	output_audio = medley_voxer(input, output, model_name, output_format, stereo_mode) # primary stems
	results = []
	if stems == 2:
	return output_audio

	if stems == 4:
	for stem, file in output_audio:
	voxes = medley_voxer(file, output, model_name, output_format, stereo_mode)
	results.extend(voxes)
	print(results)
	return results

	if stems == 8:
	for stem, file in output_audio:
	voxes = medley_voxer(file, output, model_name, output_format, stereo_mode)
	for stem2, file2 in voxes:
	voxes2 = medley_voxer(file2, output, model_name, output_format, stereo_mode)
	results.extend(voxes2)
	print(results)
	return results

	if stems == 16:
	for stem, file in output_audio:
	voxes = medley_voxer(file, output, model_name, output_format, stereo_mode)
	for stem2, file2 in voxes:
	voxes2 = medley_voxer(file2, output, model_name, output_format, stereo_mode)
	for stem3, file3 in voxes2:
	voxes3 = medley_voxer(file3, output, model_name, output_format, stereo_mode)
	results.extend(voxes3)
	print(results)
	return results


	##############

	def multi_voxer_gradio(input, output, model_name, output_format, stereo_mode, stems):

	output_audio = multi_voxer(input, output, model_name, output_format, stereo_mode, stems)
	batch_names = []
	if output_audio is not None:
	for i, (stem, output_file) in enumerate(output_audio[:20]):
	batch_names.append(gr.update(
	visible=True,
	label=stem,
	value=output_file
	))
	# Заполняем оставшиеся слоты невидимыми элементами
	while len(batch_names) < 20:
	batch_names.append(gr.update(visible=False, label=None, value=None))
	return tuple(batch_names)

	def medley_vox_plugin_name():
	return "Medley-Vox"

	def medley_vox_plugin(lang):
	set_language(lang)
	output_dir = gr.Text(value="/content/output/", visible=False)
	with gr.Tab(t("inference")):
	with gr.Row(equal_height=True):
	with gr.Column():
	input_voice = gr.Audio(show_label=False, type="filepath", interactive=True)
	with gr.Column():
	vox_model_name = gr.Dropdown(label=t("vox_model_name"), choices=list(medley_vox_models.keys()), value=list(medley_vox_models.keys())[0], interactive=True, filterable=False)
	stereo_mode = gr.Dropdown(label=t("vox_stereo_mode"), choices=["mono", "full"], value="mono", interactive=True, filterable=False)
	output_vox_format = gr.Dropdown(label=t("vox_output_format"), choices=list(filter(lambda fmt: fmt != "ogg", OUTPUT_FORMATS)), value="mp3", interactive=True, filterable=False)
	separate_vox_btn = gr.Button(t("separate_vocals_btn"), variant="primary")
	output_voxes = [gr.Audio(visible=(i == 0), interactive=False, type="filepath", show_download_button=True) for i in range(2)]

	with gr.Tab(t("vocal_multi_separation")):
	with gr.Row(equal_height=True):
	with gr.Column():
	input_vox = gr.Audio(show_label=False, type="filepath", interactive=True)
	with gr.Column():
	vox_m_model_name = gr.Dropdown(label=t("vox_model_name"), choices=list(medley_vox_models.keys()), value=list(medley_vox_models.keys())[0], interactive=True, filterable=False)
	with gr.Row():
	stereo_m_mode = gr.Dropdown(label=t("vox_stereo_mode"), choices=["mono", "full"], value="mono", interactive=True, filterable=False)
	count_stems = gr.Dropdown(label=t("vox_count_stems"), choices=[2, 4, 8, 16], value=2, interactive=True, filterable=False)
	output_m_vox_format = gr.Dropdown(label=t("vox_output_format"), choices=list(filter(lambda fmt: fmt != "ogg", OUTPUT_FORMATS)), value="mp3", interactive=True, filterable=False)
	separate_m_vox_btn = gr.Button(t("vox_multi_separate_btn"), variant="primary")
	output_m_voxes = [gr.Audio(visible=(i == 0), interactive=False, type="filepath", show_download_button=True) for i in range(20)]

	separate_m_vox_btn.click(fn=(lambda : os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))), inputs=None, outputs=output_dir).then(fn=multi_voxer_gradio, inputs=[input_vox, output_dir, vox_m_model_name, output_m_vox_format, stereo_m_mode, count_stems], outputs=[*output_m_voxes])

	separate_vox_btn.click(fn=(lambda : os.path.join(OUTPUT_DIR, datetime.now().strftime("%Y%m%d_%H%M%S"))), inputs=None, outputs=output_dir).then(fn=medley_voxer_gradio, inputs=[input_voice, output_dir, vox_model_name, output_vox_format, stereo_mode], outputs=output_voxes)