| import os |
| import gradio as gr |
| import soundfile as sf |
| import librosa |
| import numpy as np |
| from pydub import AudioSegment |
| from pedalboard import Pedalboard, Compressor, Reverb, Delay, NoiseGate, Chorus |
| from pedalboard.io import AudioFile |
| from separator.audio_writer import write_audio_file |
| import tempfile |
| from model_list import models_data as mvsepless_models |
| from multi_inference import single_multi_inference |
|
|
| RVC_MODELS_DIR = os.path.join(os.getcwd(), "voice_models") |
| OUTPUT_FORMAT = ["mp3", "wav", "flac", "aiff", "m4a", "aac", "ogg", "opus"] |
|
|
| |
| saved_processing_data = None |
|
|
| |
| TRANSLATIONS = { |
| "ru": { |
| |
| "model_name_label": "Имя модели", |
| "update_button": "Обновить", |
| "input_audio_label": "Входная песня", |
| "generate_btn": "Сгенерировать кавер", |
| "remix_btn": "Пересвести кавер", |
| "final_result": "Финальный результат", |
| "intermediate_files": "Промежуточные файлы", |
| "status_label": "Статус", |
| "tab_separation": "Разделение", |
| "tab_voice_settings": "Настройки преобразования голоса", |
| "tab_mixing_settings": "Настройки сведения аудио", |
| |
| |
| "preclear_vocals": "Очистить вокал от реверба/эха", |
| "split_vocals": "Разделить вокал на лид/бэк-вокалы", |
| "vocal_model": "Вокальная модель", |
| "dereverb_model": "Dereverb/Deecho модель", |
| "karaoke_model": "Караоке модель", |
| |
| |
| "conversion_mode": "Режим преобразования", |
| "conversion_info": "lead - только основной вокал\nback - только бэк-вокал\nlead/back - основной и бэк-вокалы\nfull - весь вокал", |
| "vocal_pitch": "Высота тона вокала", |
| "backing_pitch": "Высота тона бэк-вокала", |
| "pitch_method": "Метод извлечения тона", |
| "max_pitch": "Верхний лимит определения высоты тона", |
| "index_rate": "Влияние индекса", |
| "filter_radius": "Радиус фильтра", |
| "rms_envelope": "Огибающая громкости", |
| "protect_cons": "Защита согласных", |
| "hop_length": "Длина шага", |
| |
| |
| "volume_adjust": "Изменение громкости", |
| "vocals_gain": "Вокал", |
| "backing_gain": "Бэк-вокал", |
| "inst_gain": "Инструментал", |
| "output_format": "Формат вывода", |
| "add_unconv": "Добавить к инструменталу непреобразованный вокал", |
| "add_effects": "Добавить эффекты на голос", |
| |
| |
| "effects_tab": "Эффекты", |
| "echo_tab": "Эхо", |
| "echo_delay": "Время задержки (сек)", |
| "echo_feedback": "Обратная связь", |
| "echo_mix": "Смешение", |
| "reverb_tab": "Реверберация", |
| "reverb_size": "Размер комнаты", |
| "reverb_width": "Ширина реверберации", |
| "reverb_wet": "Уровень влажности", |
| "reverb_dry": "Уровень сухости", |
| "reverb_damping": "Уровень демпфирования", |
| "chorus_tab": "Хорус", |
| "chorus_rate": "Скорость хоруса", |
| "chorus_depth": "Глубина хоруса", |
| "chorus_delay": "Задержка центра (мс)", |
| "chorus_feedback": "Обратная связь", |
| "chorus_mix": "Смешение", |
| |
| |
| "processing_tab": "Обработка", |
| "compressor_tab": "Компрессор", |
| "comp_ratio": "Соотношение", |
| "comp_threshold": "Порог", |
| "comp_attack": "Время атаки (мс)", |
| "comp_release": "Время спада (мс)", |
| "noise_gate_tab": "Подавление шума", |
| "gate_threshold": "Порог", |
| "gate_ratio": "Соотношение", |
| "gate_attack": "Время атаки (мс)", |
| "gate_release": "Время спада (мс)", |
| |
| |
| "start_processing": "Начало обработки...", |
| "separation": "Разделение на музыку и вокал...", |
| "extracting": "Извлечение лид/бэк-вокала...", |
| "cleaning": "Очистка вокалов...", |
| "converting": "Преобразование вокалов...", |
| "mixing": "Сведение итогового кавера...", |
| |
| |
| "error_audio_model": "Сначала загрузите аудио и выберите модель", |
| "error_audio": "Сначала загрузите аудио", |
| "error_model": "Сначала выберите модель", |
| "error_generate_first": "Сначала сгенерируйте кавер хотя бы один раз!", |
| |
| "vbach_required": "Vbach (RVC) не установлен. В блокноте запустите ячейку 'Установка' с флагом install_vbach [✓]" |
| }, |
| "en": { |
| |
| "model_name_label": "Model name", |
| "update_button": "Update", |
| "input_audio_label": "Input song", |
| "generate_btn": "Generate cover", |
| "remix_btn": "Remix cover", |
| "final_result": "Final result", |
| "intermediate_files": "Intermediate files", |
| "status_label": "Status", |
| "tab_separation": "Separation", |
| "tab_voice_settings": "Voice conversion settings", |
| "tab_mixing_settings": "Audio mixing settings", |
| |
| |
| "preclear_vocals": "Clear vocals from reverb/echo", |
| "split_vocals": "Split vocals into lead/backing", |
| "vocal_model": "Vocal model", |
| "dereverb_model": "Dereverb/Deecho model", |
| "karaoke_model": "Karaoke model", |
| |
| |
| "conversion_mode": "Conversion mode", |
| "conversion_info": "lead - lead vocals only\nback - backing vocals only\nlead/back - both vocals\nfull - full vocals", |
| "vocal_pitch": "Vocal pitch", |
| "backing_pitch": "Backing vocals pitch", |
| "pitch_method": "Pitch extraction method", |
| "max_pitch": "Max pitch detection frequency", |
| "index_rate": "Index rate", |
| "filter_radius": "Filter radius", |
| "rms_envelope": "RMS envelope", |
| "protect_cons": "Protect consonants", |
| "hop_length": "Hop length", |
| |
| |
| "volume_adjust": "Volume adjustment", |
| "vocals_gain": "Vocals", |
| "backing_gain": "Backing vocals", |
| "inst_gain": "Instrumental", |
| "output_format": "Output format", |
| "add_unconv": "Add unconverted vocals to instrumental", |
| "add_effects": "Apply effects to vocals", |
| |
| |
| "effects_tab": "Effects", |
| "echo_tab": "Echo", |
| "echo_delay": "Delay time (sec)", |
| "echo_feedback": "Feedback", |
| "echo_mix": "Mix", |
| "reverb_tab": "Reverb", |
| "reverb_size": "Room size", |
| "reverb_width": "Reverb width", |
| "reverb_wet": "Wet level", |
| "reverb_dry": "Dry level", |
| "reverb_damping": "Damping", |
| "chorus_tab": "Chorus", |
| "chorus_rate": "Rate (Hz)", |
| "chorus_depth": "Depth", |
| "chorus_delay": "Center delay (ms)", |
| "chorus_feedback": "Feedback", |
| "chorus_mix": "Mix", |
| |
| |
| "processing_tab": "Processing", |
| "compressor_tab": "Compressor", |
| "comp_ratio": "Ratio", |
| "comp_threshold": "Threshold (dB)", |
| "comp_attack": "Attack time (ms)", |
| "comp_release": "Release time (ms)", |
| "noise_gate_tab": "Noise Gate", |
| "gate_threshold": "Threshold (dB)", |
| "gate_ratio": "Ratio", |
| "gate_attack": "Attack time (ms)", |
| "gate_release": "Release time (ms)", |
| |
| |
| "start_processing": "Starting processing...", |
| "separation": "Separating music and vocals...", |
| "extracting": "Extracting lead/backing vocals...", |
| "cleaning": "Cleaning vocals...", |
| "converting": "Converting vocals...", |
| "mixing": "Mixing final cover...", |
| |
| |
| "error_audio_model": "Please upload audio and select model first", |
| "error_audio": "Please upload audio first", |
| "error_model": "Please select model first", |
| "error_generate_first": "Generate a cover at least once first!", |
| |
| "vbach_required": "Vbach (RVC) is not installed. In the notebook, run the 'Установка' cell with the flag install_vbach [✓]" |
| } |
| } |
|
|
| CURRENT_LANG = "ru" |
|
|
| def set_language(lang): |
| global CURRENT_LANG |
| CURRENT_LANG = lang |
|
|
| def t(key): |
| """Функция для получения перевода""" |
| return TRANSLATIONS[CURRENT_LANG].get(key, key) |
|
|
| def list_models(category, model_type=None): |
| list_models = [] |
| if not model_type: |
| for m_type in list(mvsepless_models.keys()): |
| for m_name in list(mvsepless_models[m_type].keys()): |
| if mvsepless_models[m_type][m_name]["category"] in category: |
| list_models.append(f"{m_type} / {m_name}") |
| else: |
| for m_type in model_type: |
| for m_name in list(mvsepless_models[m_type].keys()): |
| if mvsepless_models[m_type][m_name]["category"] in category: |
| list_models.append(f"{m_type} / {m_name}") |
| return list_models |
|
|
| def find_file_from_stem(results, stem_names=["Vocals", "vocals"]): |
| for stem_name, stem_file in results: |
| if stem_name in stem_names: |
| stem_path = stem_file |
| return stem_path |
|
|
| def mix_and_save( |
| inst_path, |
| list_vocals, |
| converted_vocals_list, |
| mix_params, |
| params, |
| rvc_params, |
| temp_dir, |
| input_audio |
| ): |
| |
| final_audio = None |
| samplerate = 44100 |
|
|
| |
| if inst_path and os.path.exists(inst_path): |
| inst_data, samplerate = librosa.load(inst_path, mono=False, sr=None, dtype='float32') |
| if inst_data.ndim == 1: |
| inst_data = np.expand_dims(inst_data, axis=0) |
| inst_gain = 10 ** (mix_params["gain"]["instrum"] / 20.0) |
| inst_data *= inst_gain |
| final_audio = inst_data.copy() |
|
|
| |
| if mix_params["add_unconverted_vocals_to_instrumental"]: |
| if params["conversion_mode"] == "lead" and list_vocals[1][1]: |
| back_vocals, _ = librosa.load(list_vocals[1][1], mono=False, sr=samplerate, dtype='float32') |
| if back_vocals.ndim == 1: |
| back_vocals = np.expand_dims(back_vocals, axis=0) |
| final_audio = back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) if final_audio is None else final_audio + back_vocals * (10 ** (mix_params["gain"]["vocals2"] / 20.0)) |
| elif params["conversion_mode"] == "back" and list_vocals[2][1]: |
| lead_vocals, _ = librosa.load(list_vocals[2][1], mono=False, sr=samplerate, dtype='float32') |
| if lead_vocals.ndim == 1: |
| lead_vocals = np.expand_dims(lead_vocals, axis=0) |
| final_audio = lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) if final_audio is None else final_audio + lead_vocals * (10 ** (mix_params["gain"]["vocals1"] / 20.0)) |
|
|
| |
| for i, vocal_path in enumerate(converted_vocals_list): |
| if not vocal_path or not os.path.exists(vocal_path): |
| continue |
|
|
| vocal_data, sr = librosa.load(vocal_path, mono=False, sr=None, dtype='float32') |
| if vocal_data.ndim == 1: |
| vocal_data = np.expand_dims(vocal_data, axis=0) |
|
|
| |
| if sr != samplerate: |
| audio_segment = AudioSegment.from_wav(vocal_path) |
| audio_segment = audio_segment.set_frame_rate(samplerate) |
| samples = np.array(audio_segment.get_array_of_samples()) |
| channels = audio_segment.channels |
|
|
| samples = samples.astype(np.float32) |
| max_value = 2 ** (8 * audio_segment.sample_width - 1) |
| samples /= max_value |
|
|
| vocal_data = samples.reshape((-1, channels)).T |
|
|
| |
| if mix_params.get("use_effects", False): |
| board = Pedalboard() |
| effects = mix_params.get("pedalboard_settings", {}) |
|
|
| if "compressor" in effects: |
| comp = effects["compressor"] |
| board.append(Compressor( |
| ratio=comp["ratio"], |
| threshold_db=comp["threshold"], |
| attack_ms=comp["attack"], |
| release_ms=comp["release"] |
| )) |
|
|
| if "noise_gate" in effects: |
| ng = effects["noise_gate"] |
| board.append(NoiseGate( |
| threshold_db=ng["threshold"], |
| ratio=ng["ratio"], |
| attack_ms=ng["attack"], |
| release_ms=ng["release"] |
| )) |
|
|
| if "echo" in effects: |
| echo = effects["echo"] |
| board.append(Delay( |
| delay_seconds=echo["delay"], |
| feedback=echo["feedback"], |
| mix=echo["mix"] |
| )) |
|
|
| if "reverb" in effects: |
| rev = effects["reverb"] |
| board.append(Reverb( |
| room_size=rev["room_size"], |
| dry_level=rev["dry"], |
| wet_level=rev["wet"], |
| damping=rev["damping"], |
| width=rev["width"] |
| )) |
|
|
| if "chorus" in effects: |
| chorus = effects["chorus"] |
| board.append(Chorus( |
| rate_hz=chorus["rate"], |
| depth=chorus["depth"], |
| centre_delay_ms=chorus["center_delay"], |
| feedback=chorus["feedback"], |
| mix=chorus["mix"] |
| )) |
|
|
| vocal_data = board(vocal_data, samplerate) |
|
|
| |
| gain_db = mix_params["gain"]["vocals1"] if i == 0 else mix_params["gain"]["vocals2"] |
| vocal_data *= 10 ** (gain_db / 20.0) |
|
|
| |
| if final_audio is None: |
| final_audio = vocal_data.copy() |
| else: |
| |
| min_len = min(final_audio.shape[1], vocal_data.shape[1]) |
| final_audio = final_audio[:, :min_len] + vocal_data[:, :min_len] |
|
|
| |
| max_amplitude = np.max(np.abs(final_audio)) |
| if max_amplitude > 0: |
| normalization_factor = 1.0 / max_amplitude |
| final_audio = final_audio * normalization_factor |
| filename = f"{rvc_params['model_name']} - {os.path.splitext(os.path.basename(input_audio))[0]}.{params['output_format']}" if input_audio else f"remixed.{params['output_format']}" |
| final_path = os.path.join(temp_dir, filename) |
| write_audio_file(final_path, final_audio, samplerate, params['output_format'], "320k") |
|
|
| return final_path |
|
|
| def gen_cover( |
| input_audio, |
| anti_instrum_model, |
| karaoke_model, |
| dereverb_model, |
| output_format, |
| karaoke_check, |
| conversion_mode, |
| preclear_vocals_check, |
| voice_name, |
| pitch1_val, |
| pitch2_val, |
| method_pitch, |
| index_rate, |
| fr, |
| rms, |
| protect, |
| hop_mangio_crepe, |
| f0_max, |
| unconv_vocals_check, |
| use_effects, |
| instrumental_gain, |
| vocal1_gain, |
| vocal2_gain, |
| echo_delay, |
| echo_feedback, |
| echo_mix, |
| reverb_rm_size, |
| reverb_width, |
| reverb_wet, |
| reverb_dry, |
| reverb_damping, |
| chorus_rate_hz, |
| chorus_depth, |
| chorus_centre_delay_ms, |
| chorus_feedback, |
| chorus_mix, |
| compressor_ratio, |
| compressor_threshold, |
| compressor_attack, |
| compressor_release, |
| noise_gate_threshold, |
| noise_gate_ratio, |
| noise_gate_attack, |
| noise_gate_release |
| ): |
| |
| global saved_processing_data |
|
|
| if not input_audio and not voice_name: |
| raise gr.Error(t("error_audio_model")) |
|
|
| if not input_audio: |
| raise gr.Error(t("error_audio")) |
|
|
| if not voice_name: |
| raise gr.Error(t("error_model")) |
|
|
| |
| models = [ |
| anti_instrum_model, |
| karaoke_model, |
| dereverb_model |
| ] |
|
|
| params = { |
| "output_format": output_format, |
| "extract_karaoke": karaoke_check, |
| "conversion_mode": conversion_mode, |
| "preclear_vocals": preclear_vocals_check |
| } |
|
|
| rvc_params = { |
| "model_name": voice_name, |
| "pitch1": pitch1_val, |
| "pitch2": pitch2_val, |
| "f0_method": method_pitch, |
| 'index_rate': index_rate, |
| 'filter_radius': fr, |
| 'rms': rms, |
| 'protect': protect, |
| 'hop_length': hop_mangio_crepe, |
| 'f0_max': f0_max |
| } |
|
|
| mix_params = { |
| "add_unconverted_vocals_to_instrumental": unconv_vocals_check, |
| "use_effects": use_effects, |
| "gain": { |
| "instrum": instrumental_gain, |
| "vocals1": vocal1_gain, |
| "vocals2": vocal2_gain |
| }, |
| "pedalboard_settings": { |
| "echo": { |
| "delay": echo_delay, |
| "feedback": echo_feedback, |
| "mix": echo_mix |
| }, |
| "reverb": { |
| "room_size": reverb_rm_size, |
| "wet": reverb_wet, |
| "dry": reverb_dry, |
| "damping": reverb_damping, |
| "width": reverb_width, |
| }, |
| "compressor": { |
| "ratio": compressor_ratio, |
| "threshold": compressor_threshold, |
| "attack": compressor_attack, |
| "release": compressor_release |
| }, |
| "noise_gate": { |
| "threshold": noise_gate_threshold, |
| "ratio": noise_gate_ratio, |
| "attack": noise_gate_attack, |
| "release": noise_gate_release, |
| }, |
| "chorus": { |
| "rate": chorus_rate_hz, |
| "depth": chorus_depth, |
| "center_delay": chorus_centre_delay_ms, |
| "feedback": chorus_feedback, |
| "mix": chorus_mix |
| } |
| } |
| } |
|
|
| progress = gr.Progress() |
|
|
| progress(0, desc=t("start_processing")) |
|
|
| generated_files = [] |
| converted_vocals_list = [] |
| temp_dir = tempfile.mkdtemp() |
| inst_model = models[0] |
| kar_model = models[1] |
| dereverb_model = models[2] |
|
|
| progress(0.1, desc=t("separation")) |
|
|
| |
| inst_output = single_multi_inference(input_audio, os.path.join(temp_dir, "inst_output"), |
| inst_model.split(" / ")[0], inst_model.split(" / ")[1], |
| True, vr_aggr=5, output_format="wav", |
| output_bitrate="320k", template="VbachGen_NAME_STEM", |
| call_method="cli", selected_stems=[]) |
| inst_file = ("instrumental", find_file_from_stem(inst_output, ["Instrumental", "instrumental", "other", "Other"])) |
| full_vocals_file = ("full_vocals", find_file_from_stem(inst_output, ["Vocals", "vocals"])) |
|
|
| progress(0.2, desc=t("extracting")) |
|
|
| back_vocals_file = (None, None) |
| lead_vocals_file = (None, None) |
|
|
| if params["extract_karaoke"] == True: |
| karaoke_output = single_multi_inference(full_vocals_file[1], os.path.join(temp_dir, "kar_output"), |
| kar_model.split(" / ")[0], kar_model.split(" / ")[1], |
| True, vr_aggr=5, output_format="wav", |
| output_bitrate="320k", template="NAME_MODEL_STEM", |
| call_method="cli", selected_stems=[]) |
| back_vocals_file = ("back_vocals", find_file_from_stem(karaoke_output, ["Instrumental", "instrumental", "other", "Other"])) |
| lead_vocals_file = ("lead_vocals", find_file_from_stem(karaoke_output, ["Vocals", "vocals", "karaoke"])) |
|
|
| list_vocals = [ |
| full_vocals_file, |
| back_vocals_file if params["extract_karaoke"] else (None, None), |
| lead_vocals_file if params["extract_karaoke"] else (None, None) |
| ] |
|
|
| clear_list_vocals = [ |
| full_vocals_file, |
| back_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["back", "lead/back"] else (None, None), |
| lead_vocals_file if params["extract_karaoke"] and params["conversion_mode"] in ["lead", "lead/back"] else (None, None) |
| ] |
|
|
| progress(0.3, desc=t("cleaning")) |
|
|
| |
| cleared_vocals = [] |
| if params["preclear_vocals"] == True: |
| for i, (name, file) in enumerate(clear_list_vocals): |
| if file and os.path.exists(file): |
| clear_output = single_multi_inference(file, os.path.join(temp_dir, f"cleared_output_{i}"), |
| dereverb_model.split(" / ")[0], dereverb_model.split(" / ")[1], |
| True, vr_aggr=5, output_format="wav", |
| output_bitrate="320k", template="NAME_STEM", |
| call_method="cli", selected_stems=[]) |
| cleared_file = find_file_from_stem(clear_output, ["No Echo", "No Reverb", "Dry", "Other"]) |
| cleared_vocals.append((name, cleared_file)) |
| for i, voc in enumerate(list_vocals): |
| for clear_voc in cleared_vocals: |
| if clear_voc[0] == voc[0]: |
| list_vocals[i] = clear_voc |
| break |
|
|
| progress(0.5, desc=t("converting")) |
|
|
| |
| if params["conversion_mode"] == "full" and list_vocals[0][1]: |
| full_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "full_vocals_converted.wav") |
| cmd = f"python -m vbach.cli.vbach '{list_vocals[0][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'full_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
| os.system(cmd) |
| converted_vocals_list.append(full_vocals_converted_path) |
|
|
| elif params["conversion_mode"] == "lead/back" and list_vocals[1][1] and list_vocals[2][1]: |
| lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") |
| cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
| os.system(cmd) |
|
|
| back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") |
| cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
| os.system(cmd) |
|
|
| converted_vocals_list.append(back_vocals_converted_path) |
| converted_vocals_list.append(lead_vocals_converted_path) |
|
|
| elif params["conversion_mode"] == "back" and list_vocals[1][1]: |
| back_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "back_vocals_converted.wav") |
| cmd = f"python -m vbach.cli.vbach '{list_vocals[1][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'back_vocals_converted' --pitch {rvc_params['pitch2']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['protect']} --protect {rvc_params['rms']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
| os.system(cmd) |
| converted_vocals_list.append(back_vocals_converted_path) |
|
|
| elif params["conversion_mode"] == "lead" and list_vocals[2][1]: |
| lead_vocals_converted_path = os.path.join(os.path.join(temp_dir, 'converted'), "lead_vocals_converted.wav") |
| cmd = f"python -m vbach.cli.vbach '{list_vocals[2][1]}' '{os.path.join(temp_dir, 'converted')}' '{rvc_params['model_name']}' --template 'lead_vocals_converted' --pitch {rvc_params['pitch1']} --method_pitch {rvc_params['f0_method']} --index_rate {rvc_params['index_rate']} --filter_radius {rvc_params['filter_radius']} --rms {rvc_params['rms']} --protect {rvc_params['protect']} --hop_length {rvc_params['hop_length']} --f0_min 50 --f0_max {rvc_params['f0_max']} --output_format wav --stereo_mode 'mono'" |
| os.system(cmd) |
| converted_vocals_list.append(lead_vocals_converted_path) |
|
|
| |
| generated_files.append(inst_file[1]) |
| for name, file in list_vocals: |
| if file: |
| generated_files.append(file) |
| generated_files.extend(converted_vocals_list) |
|
|
| |
| saved_processing_data = { |
| "inst_path": inst_file[1], |
| "list_vocals": list_vocals, |
| "converted_vocals_list": converted_vocals_list, |
| "params": params, |
| "rvc_params": rvc_params, |
| "input_audio": input_audio |
| } |
|
|
| progress(0.9, desc=t("mixing")) |
|
|
| |
| final_path = mix_and_save( |
| inst_file[1], |
| list_vocals, |
| converted_vocals_list, |
| mix_params, |
| params, |
| rvc_params, |
| temp_dir, |
| input_audio |
| ) |
|
|
| generated_files.append(final_path) |
|
|
| return generated_files, final_path |
|
|
| def remix_cover( |
| use_effects, |
| instrumental_gain, |
| vocal1_gain, |
| vocal2_gain, |
| echo_delay, |
| echo_feedback, |
| echo_mix, |
| reverb_rm_size, |
| reverb_width, |
| reverb_wet, |
| reverb_dry, |
| reverb_damping, |
| chorus_rate_hz, |
| chorus_depth, |
| chorus_centre_delay_ms, |
| chorus_feedback, |
| chorus_mix, |
| compressor_ratio, |
| compressor_threshold, |
| compressor_attack, |
| compressor_release, |
| noise_gate_threshold, |
| noise_gate_ratio, |
| noise_gate_attack, |
| noise_gate_release |
| ): |
| global saved_processing_data |
| if not saved_processing_data: |
| raise gr.Error(t("error_generate_first")) |
|
|
| |
| data = saved_processing_data |
| temp_dir = tempfile.mkdtemp() |
|
|
| |
| mix_params = { |
| "add_unconverted_vocals_to_instrumental": True, |
| "use_effects": use_effects, |
| "gain": { |
| "instrum": instrumental_gain, |
| "vocals1": vocal1_gain, |
| "vocals2": vocal2_gain |
| }, |
| "pedalboard_settings": { |
| "echo": { |
| "delay": echo_delay, |
| "feedback": echo_feedback, |
| "mix": echo_mix |
| }, |
| "reverb": { |
| "room_size": reverb_rm_size, |
| "wet": reverb_wet, |
| "dry": reverb_dry, |
| "damping": reverb_damping, |
| "width": reverb_width, |
| }, |
| "compressor": { |
| "ratio": compressor_ratio, |
| "threshold": compressor_threshold, |
| "attack": compressor_attack, |
| "release": compressor_release |
| }, |
| "noise_gate": { |
| "threshold": noise_gate_threshold, |
| "ratio": noise_gate_ratio, |
| "attack": noise_gate_attack, |
| "release": noise_gate_release, |
| }, |
| "chorus": { |
| "rate": chorus_rate_hz, |
| "depth": chorus_depth, |
| "center_delay": chorus_centre_delay_ms, |
| "feedback": chorus_feedback, |
| "mix": chorus_mix |
| } |
| } |
| } |
|
|
| |
| final_path = mix_and_save( |
| data["inst_path"], |
| data["list_vocals"], |
| data["converted_vocals_list"], |
| mix_params, |
| data["params"], |
| data["rvc_params"], |
| temp_dir, |
| data["input_audio"] |
| ) |
|
|
| return final_path |
|
|
| def vbach_plugin_name(): |
| return "VbachGen" |
|
|
| def vbachgen_plugin(lang): |
| set_language(lang) |
| with gr.Blocks(): |
| if os.path.exists("vbach"): |
| with gr.Row(equal_height=False, variant="panel"): |
| with gr.Column(): |
| model_name = gr.Dropdown(label=t("model_name_label"), interactive=True, filterable=False, scale=6) |
| model_update_btn = gr.Button(t("update_button"), variant="primary", scale=3, size="lg") |
| with gr.Row(min_height=150): |
| input_audio = gr.File(label=t("input_audio_label"), interactive=True, type="filepath", file_count="single") |
| |
| with gr.Row(): |
| with gr.Column(): |
| |
| with gr.Tab(t("tab_separation")): |
| preclear_vocals_check = gr.Checkbox(label=t("preclear_vocals"), value=False) |
| karaoke_check = gr.Checkbox(label=t("split_vocals"), value=False) |
| |
| with gr.Column(variant="panel"): |
| with gr.Group() as extract_vocals_group: |
| anti_instrum_model = gr.Dropdown( |
| label=t("vocal_model"), |
| choices=list_models(["Инструментал", "Вокал", "Инструментал и вокал"], ["mel_band_roformer", "bs_roformer", "mdx23c", "mdx", "htdemucs"]), |
| interactive=True, |
| filterable=False |
| ) |
| |
| with gr.Group(visible=False) as deecho_group: |
| dereverb_model = gr.Dropdown( |
| label=t("dereverb_model"), |
| choices=list_models(["Реверб и эхо", "Реверб", "Эхо"], ["vr"]), |
| interactive=True, |
| filterable=False |
| ) |
| |
| with gr.Group(visible=False) as karaoke_group: |
| karaoke_model = gr.Dropdown( |
| label=t("karaoke_model"), |
| choices=list_models(["Караоке"]), |
| interactive=True, |
| filterable=False |
| ) |
| |
| |
| with gr.Tab(t("tab_voice_settings")): |
| conversion_mode = gr.Dropdown( |
| label=t("conversion_mode"), |
| choices=["lead", "back", "lead/back", "full"], |
| value="full", |
| filterable=False, |
| visible=False, |
| info=t("conversion_info") |
| ) |
| with gr.Row(): |
| pitch1 = gr.Slider(-48, 48, value=0, step=12, label=t("vocal_pitch"), interactive=True) |
| pitch2 = gr.Slider(-48, 48, value=0, step=12, label=t("backing_pitch"), visible=False, interactive=True) |
| with gr.Row(): |
| method_pitch = gr.Dropdown( |
| label=t("pitch_method"), |
| choices=["mangio-crepe", "rmvpe+", "fcpe"], |
| value="rmvpe+", |
| interactive=True, |
| filterable=False |
| ) |
| f0_max = gr.Slider(50, 2000, value=1100, step=50, label=t("max_pitch"), interactive=True) |
| with gr.Row(): |
| with gr.Column(scale=1): |
| index_rate = gr.Slider(0, 1, value=0, step=0.1, label=t("index_rate"), interactive=True) |
| fr = gr.Slider(0, 7, value=3, step=1, label=t("filter_radius"), interactive=True) |
| with gr.Column(scale=1): |
| rms = gr.Slider(0, 1, value=0.25, step=0.05, label=t("rms_envelope"), interactive=True) |
| protect = gr.Slider(minimum=0, maximum=0.5, step=0.01, value=0.33, label=t("protect_cons"), interactive=True) |
| hop_mangio_crepe = gr.Slider(1, 512, value=128, step=1, label=t("hop_length"), interactive=True, visible=False) |
| |
| |
| with gr.Tab(t("tab_mixing_settings")): |
| gr.Markdown(f"<center><h2>{t('volume_adjust')}</h2></center>") |
| with gr.Row(variant="panel"): |
| vocal1_gain = gr.Slider(-30, 30, value=0, step=1, label=t("vocals_gain"), scale=3, interactive=True) |
| vocal2_gain = gr.Slider(-30, 30, value=0, step=1, label=t("backing_gain"), scale=3, visible=False, interactive=True) |
| instrumental_gain = gr.Slider(-30, 30, value=0, step=1, label=t("inst_gain"), scale=3, interactive=True) |
| |
| output_format = gr.Dropdown( |
| label=t("output_format"), |
| choices=OUTPUT_FORMAT, |
| value="wav", |
| interactive=True, |
| filterable=False |
| ) |
| unconv_vocals_check = gr.Checkbox(label=t("add_unconv"), visible=False) |
| use_effects = gr.Checkbox(label=t("add_effects"), value=False) |
| with gr.Column(variant="panel", visible=False) as effects_accordion: |
| with gr.Tab(t("effects_tab")): |
| with gr.Tab(t("echo_tab")): |
| with gr.Group(): |
| with gr.Column(variant="panel"): |
| with gr.Row(): |
| echo_delay = gr.Slider(0, 3, value=0, label=t("echo_delay"), interactive=True) |
| echo_feedback = gr.Slider(0, 1, value=0, label=t("echo_feedback"), interactive=True) |
| echo_mix = gr.Slider(0, 1, value=0, label=t("echo_mix"), interactive=True) |
| |
| with gr.Tab(t("reverb_tab")): |
| with gr.Group(): |
| with gr.Column(variant="panel"): |
| with gr.Row(): |
| reverb_rm_size = gr.Slider(0, 1, value=0.1, label=t("reverb_size"), interactive=True) |
| reverb_width = gr.Slider(0, 1, value=1.0, label=t("reverb_width"), interactive=True) |
| with gr.Row(): |
| reverb_wet = gr.Slider(0, 1, value=0.1, label=t("reverb_wet"), interactive=True) |
| reverb_dry = gr.Slider(0, 1, value=0.8, label=t("reverb_dry"), interactive=True) |
| with gr.Row(): |
| reverb_damping = gr.Slider(0, 1, value=0.9, label=t("reverb_damping"), interactive=True) |
| |
| with gr.Tab(t("chorus_tab")): |
| with gr.Group(): |
| with gr.Column(variant="panel"): |
| with gr.Row(): |
| chorus_rate_hz = gr.Slider(0.1, 10, value=0, label=t("chorus_rate"), interactive=True) |
| chorus_depth = gr.Slider(0, 1, value=0, label=t("chorus_depth"), interactive=True) |
| with gr.Row(): |
| chorus_centre_delay_ms = gr.Slider(0, 50, value=0, label=t("chorus_delay"), interactive=True) |
| chorus_feedback = gr.Slider(0, 1, value=0, label=t("chorus_feedback"), interactive=True) |
| with gr.Row(): |
| chorus_mix = gr.Slider(0, 1, value=0, label=t("chorus_mix"), interactive=True) |
| |
| with gr.Tab(t("processing_tab")): |
| with gr.Tab(t("compressor_tab")): |
| with gr.Row(variant="panel"): |
| compressor_ratio = gr.Slider(1, 20, value=4, label=t("comp_ratio"), interactive=True) |
| compressor_threshold = gr.Slider(-60, 0, value=-12, label=t("comp_threshold"), interactive=True) |
| compressor_attack = gr.Slider(0, 2000, value=100, label=t("comp_attack"), interactive=True) |
| compressor_release = gr.Slider(0, 2000, value=100, label=t("comp_release"), interactive=True) |
| |
| with gr.Tab(t("noise_gate_tab")): |
| with gr.Group(): |
| with gr.Column(variant="panel"): |
| with gr.Row(): |
| noise_gate_threshold = gr.Slider(-60, 0, value=-40, label=t("gate_threshold"), interactive=True) |
| noise_gate_ratio = gr.Slider(1, 20, value=8, label=t("gate_ratio"), interactive=True) |
| with gr.Row(): |
| noise_gate_attack = gr.Slider(0, 100, value=10, label=t("gate_attack"), interactive=True) |
| noise_gate_release = gr.Slider(0, 1000, value=100, label=t("gate_release"), interactive=True) |
| |
| |
| with gr.Column(variant="panel"): |
| final_ai_cover = gr.Audio(label=t("final_result"), interactive=False, streaming=True) |
| generated_files_list = gr.Files(label=t("intermediate_files")) |
| with gr.Row(): |
| generate_btn = gr.Button(t("generate_btn"), variant="primary") |
| remix_btn = gr.Button(t("remix_btn"), variant="secondary") |
| status_text = gr.Textbox(label=t("status_label"), interactive=False) |
| |
| |
| method_pitch.change(fn=lambda x: gr.update(visible=True if x == "mangio-crepe" else False), inputs=method_pitch, outputs=hop_mangio_crepe) |
| |
| model_update_btn.click(fn=(lambda : gr.update(choices=[d for d in os.listdir(RVC_MODELS_DIR) if os.path.isdir(os.path.join(RVC_MODELS_DIR, d))])), inputs=None, outputs=model_name) |
| |
| |
| use_effects.change( |
| fn=lambda x: gr.update(visible=x), |
| inputs=use_effects, |
| outputs=effects_accordion |
| ) |
| |
| karaoke_check.change( |
| fn=lambda x: gr.update(visible=x), |
| inputs=karaoke_check, |
| outputs=karaoke_group |
| ).then(fn=lambda x: gr.update(value="full", visible=x), inputs=karaoke_check, outputs=conversion_mode).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check) |
| |
| preclear_vocals_check.change( |
| fn=lambda x: gr.update(visible=x), |
| inputs=preclear_vocals_check, |
| outputs=deecho_group |
| ) |
| |
| conversion_mode.change( |
| fn=lambda mode: ( |
| gr.update(visible=mode in ["lead", "lead/back"]), |
| gr.update(visible=mode in ["back", "lead/back"]), |
| gr.update(visible=mode in ["lead/back"]) |
| ), |
| inputs=conversion_mode, |
| outputs=[vocal1_gain, vocal2_gain, pitch2] |
| ).then(fn=lambda x: gr.update(visible=True if x in ["back", "lead"] else False, value=False), inputs=conversion_mode, outputs=unconv_vocals_check) |
| |
| generate_btn.click( |
| fn=gen_cover, |
| inputs=[ |
| input_audio, |
| anti_instrum_model, |
| karaoke_model, |
| dereverb_model, |
| output_format, |
| karaoke_check, |
| conversion_mode, |
| preclear_vocals_check, |
| model_name, |
| pitch1, |
| pitch2, |
| method_pitch, |
| index_rate, |
| fr, |
| rms, |
| protect, |
| hop_mangio_crepe, |
| f0_max, |
| unconv_vocals_check, |
| use_effects, |
| instrumental_gain, |
| vocal1_gain, |
| vocal2_gain, |
| echo_delay, |
| echo_feedback, |
| echo_mix, |
| reverb_rm_size, |
| reverb_width, |
| reverb_wet, |
| reverb_dry, |
| reverb_damping, |
| chorus_rate_hz, |
| chorus_depth, |
| chorus_centre_delay_ms, |
| chorus_feedback, |
| chorus_mix, |
| compressor_ratio, |
| compressor_threshold, |
| compressor_attack, |
| compressor_release, |
| noise_gate_threshold, |
| noise_gate_ratio, |
| noise_gate_attack, |
| noise_gate_release |
| ], |
| outputs=[generated_files_list, final_ai_cover] |
| ) |
| |
| remix_btn.click( |
| fn=remix_cover, |
| inputs=[ |
| use_effects, |
| instrumental_gain, |
| vocal1_gain, |
| vocal2_gain, |
| echo_delay, |
| echo_feedback, |
| echo_mix, |
| reverb_rm_size, |
| reverb_width, |
| reverb_wet, |
| reverb_dry, |
| reverb_damping, |
| chorus_rate_hz, |
| chorus_depth, |
| chorus_centre_delay_ms, |
| chorus_feedback, |
| chorus_mix, |
| compressor_ratio, |
| compressor_threshold, |
| compressor_attack, |
| compressor_release, |
| noise_gate_threshold, |
| noise_gate_ratio, |
| noise_gate_attack, |
| noise_gate_release |
| ], |
| outputs=[final_ai_cover] |
| ) |
| else: |
| gr.Markdown(f"<center><h2>{t('vbach_required')}</h2></center>") |