| import gradio as gr |
| import json |
| import pandas as pd |
| import tempfile |
| import os |
| from separator.ensemble import ensemble_audio_files |
| from pydub.utils import mediainfo |
| from pydub import AudioSegment |
| import numpy as np |
| import librosa |
| import librosa.display |
| import soundfile as sf |
| from separator.audio_writer import write_audio_file |
| from multi_inference import MVSEPLESS |
| from pydub.exceptions import CouldntDecodeError |
|
|
| mvsepless = MVSEPLESS() |
|
|
| TRANSLATIONS = { |
| "ru": { |
| "app_title": "EnsembLess", |
| "auto_ensemble": "Авто-ансамбль", |
| "invert_ensemble": "Инвертировать ансамбль", |
| "give_name_preset": "Дайте имя пресету", |
| "export": "Экспорт", |
| "import": "Импорт", |
| "manual_ensemble": "Ручной ансамбль", |
| "inverter": "Инвертер", |
| "model_selection": "Выберите модель для добавления в ансамбль", |
| "model_type": "Тип модели", |
| "model_name": "Имя модели", |
| "stem_selection": "Стем, который будет использован в ансамбле", |
| "weight": "Весы", |
| "invert_weights": "Использовать перевернутые весы для инвертированного стема", |
| "add_button": "➕ Добавить", |
| "current_ensemble": "Текущий ансамбль", |
| "remove_index": "Индекс модели, который хотите удалить (начинается с 1)", |
| "remove_button": "❌ Удалить", |
| "clear_button": "Очистить", |
| "input_audio": "Входное аудио", |
| "settings": "Настройки", |
| "method": "Метод", |
| "output_format": "Формат вывода", |
| "run_button": "Создать ансамбль", |
| "results": "Результаты", |
| "inverted_result": "Инвертированный результат", |
| "invert_method": "Метод инвертирования", |
| "invert_button": "Инвертировать", |
| "audio_files": "Аудио файлы", |
| "weights_input": "Весы", |
| "main_audio": "Основное аудио", |
| "audio_to_remove": "Аудио для удаления", |
| "processing_method": "Метод обработки", |
| "analyze_title": "РЕЗУЛЬТАТЫ АНАЛИЗА:", |
| "all_same_rate": "✅ ВСЕ ФАЙЛЫ имеют одинаковую частоту дискретизации: {rate} Hz", |
| "different_rates": "⚠️ Файлы имеют РАЗНУЮ частоту дискретизации", |
| "resample_warning": "К загруженному аудио автоматически применён ресэмплинг для лучшего инвертирования", |
| "error_no_files": "Ошибка: файлы не загружены", |
| "error_unsupported_format": "не поддерживаемый формат", |
| "error_general": "ошибка ({error})", |
| "error_no_models": "Добавьте хотя бы одну модель для создания ансамбля", |
| "error_no_audio": "Сначала загрузите аудио", |
| "error_both_audio": "Пожалуйста, загрузите оба аудиофайла", |
| "language": "Язык", |
| "batch_processing": "Пакетная обработка", |
| "batch_info": "Позволяет загрузить сразу несколько файлов", |
| "separation_info": "Информация о разделении", |
| "vocal_separation": "Разделение вокалы", |
| "stereo_mode": "Стерео режим", |
| "stem": "Стем", |
| "p_stem": "Основной стем", |
| "s_stem": "Инвертированный стем", |
| "vocal_multi_separation": "Мульти-вокал", |
| "ensemble": "Ансамбль", |
| "transform": "Преобразование", |
| "algorithm": "Алгоритм: {model_fullname}", |
| "output_format_info": "Формат выходных данных: {output_format}", |
| "process1": "Начало обработки", |
| "process2": "Модель", |
| "process3": "Автоматическое выравнивание длин аудио", |
| "process4": "Создание ансамбля", |
| "result_source": "Промежуточные файлы", |
| "local_path": "Указать путь к аудио локально", |
| "resample": "Ресэмпл" |
| }, |
| "en": { |
| "app_title": "EnsembLess", |
| "auto_ensemble": "Auto-Ensemble", |
| "invert_ensemble": "Invert ensemble", |
| "give_name_preset": "Give name of preset", |
| "export": "Export", |
| "import": "Import", |
| "manual_ensemble": "Manual Ensemble", |
| "inverter": "Inverter", |
| "model_selection": "Select a model to add to the ensemble", |
| "model_type": "Model Type", |
| "model_name": "Model Name", |
| "stem_selection": "Stem to use in the ensemble", |
| "weight": "Weights", |
| "invert_weights": "Use inverted weights for inverted stem", |
| "add_button": "➕ Add", |
| "current_ensemble": "Current Ensemble", |
| "remove_index": "Index of model to remove (starts from 1)", |
| "remove_button": "❌ Remove", |
| "clear_button": "Clear", |
| "input_audio": "Input Audio", |
| "settings": "Settings", |
| "method": "Method", |
| "output_format": "Output Format", |
| "run_button": "Create Ensemble", |
| "results": "Results", |
| "inverted_result": "Inverted Result", |
| "invert_method": "Inversion Method", |
| "invert_button": "Invert", |
| "audio_files": "Audio Files", |
| "weights_input": "Weights", |
| "main_audio": "Main Audio", |
| "audio_to_remove": "Audio to Remove", |
| "processing_method": "Processing Method", |
| "analyze_title": "ANALYSIS RESULTS:", |
| "all_same_rate": "✅ ALL FILES have the same sample rate: {rate} Hz", |
| "different_rates": "⚠️ Files have DIFFERENT sample rates", |
| "resample_warning": "Resampling applied automatically for better inversion", |
| "error_no_files": "Error: no files uploaded", |
| "error_unsupported_format": "unsupported format", |
| "error_general": "error ({error})", |
| "error_no_models": "Add at least one model to create an ensemble", |
| "error_no_audio": "Please upload audio first", |
| "error_both_audio": "Please upload both audio files", |
| "language": "Language", |
| "batch_processing": "Batch Processing", |
| "batch_info": "Allows uploading multiple files at once", |
| "separation_info": "Separation Info", |
| "vocal_separation": "Vocal Separation", |
| "stereo_mode": "Stereo Mode", |
| "stem": "Stem", |
| "p_stem": "Primary stem", |
| "s_stem": "Secondary stem", |
| "vocal_multi_separation": "Multi-Vocal", |
| "ensemble": "Ensemble", |
| "transform": "Transform", |
| "algorithm": "Algorithm: {model_fullname}", |
| "output_format_info": "Output format: {output_format}", |
| "process1": "Start process", |
| "process2": "Model", |
| "process3": "Auto post-padding audios", |
| "process4": "Build ensemble", |
| "result_source": "Intermediate files", |
| "local_path": "Specify path to audio locally", |
| "resample": "Resample" |
| } |
| } |
|
|
| INVERT_METHODS = { |
| "min_fft": "max_fft", |
| "max_fft": "min_fft", |
| "min_wave": "max_wave", |
| "max_wave": "min_wave", |
| "median_fft": "median_fft", |
| "median_wave": "median_wave", |
| "avg_fft": "avg_fft", |
| "avg_wave": "avg_wave" |
| } |
|
|
| |
| CURRENT_LANG = "ru" |
|
|
| def set_language(lang): |
| global CURRENT_LANG |
| CURRENT_LANG = lang |
|
|
| def t(key, **kwargs): |
| """Функция для получения перевода с подстановкой значений""" |
| translation = TRANSLATIONS[CURRENT_LANG].get(key, key) |
| return translation.format(**kwargs) if kwargs else translation |
|
|
|
|
| |
| N_FFT = 2048 |
| WIN_LENGTH = 2048 |
| HOP_LENGTH = WIN_LENGTH // 4 |
|
|
| class Inverter: |
| def __init__(self): |
| self.test = "test" |
| |
| def load_audio(self, filepath): |
| """Загрузка аудиофайла с помощью librosa""" |
| if filepath is None: |
| return None, None |
| try: |
| return librosa.load(filepath, sr=None, mono=False) |
| except Exception as e: |
| print(f"Ошибка загрузки аудио: {e}") |
| return None, None |
|
|
| def process_channel(self, y1_ch, y2_ch, sr, method): |
| """Обработка одного аудиоканала""" |
| if method == "waveform": |
| return y1_ch - y2_ch |
| |
| elif method == "spectrogram": |
| |
| S1 = librosa.stft(y1_ch, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH) |
| S2 = librosa.stft(y2_ch, n_fft=N_FFT, hop_length=HOP_LENGTH, win_length=WIN_LENGTH) |
| |
| |
| mag1 = np.abs(S1) |
| mag2 = np.abs(S2) |
| |
| |
| mag_result = np.maximum(mag1 - mag2, 0) |
| |
| |
| phase = np.angle(S1) |
| |
| |
| S_result = mag_result * np.exp(1j * phase) |
| |
| |
| return librosa.istft( |
| S_result, |
| n_fft=N_FFT, |
| hop_length=HOP_LENGTH, |
| win_length=WIN_LENGTH, |
| length=len(y1_ch) |
| ) |
| |
| def process_audio(self, audio1_path, audio2_path, out_format, method): |
| |
| y1, sr1 = self.load_audio(audio1_path) |
| y2, sr2 = self.load_audio(audio2_path) |
| |
| if sr1 is None or sr2 is None: |
| raise gr.Error(t("error_both_audio")) |
| |
| |
| channels1 = 1 if y1.ndim == 1 else y1.shape[0] |
| channels2 = 1 if y2.ndim == 1 else y2.shape[0] |
| |
| |
| if channels1 > 1: |
| y1 = y1.T |
| else: |
| y1 = y1.reshape(-1, 1) |
| |
| if channels2 > 1: |
| y2 = y2.T |
| else: |
| y2 = y2.reshape(-1, 1) |
| |
| |
| if sr1 != sr2: |
| if channels2 > 1: |
| |
| y2_resampled = np.zeros((len(y2), channels2), dtype=np.float32) |
| for c in range(channels2): |
| y2_resampled[:, c] = librosa.resample( |
| y2[:, c], |
| orig_sr=sr2, |
| target_sr=sr1 |
| ) |
| y2 = y2_resampled |
| else: |
| y2 = librosa.resample(y2[:, 0], orig_sr=sr2, target_sr=sr1) |
| y2 = y2.reshape(-1, 1) |
| sr2 = sr1 |
| |
| |
| min_len = min(len(y1), len(y2)) |
| y1 = y1[:min_len] |
| y2 = y2[:min_len] |
| |
| |
| result_channels = [] |
| |
| |
| if channels1 == 1 and channels2 > 1: |
| y2 = y2.mean(axis=1, keepdims=True) |
| channels2 = 1 |
| |
| for c in range(channels1): |
| |
| y1_ch = y1[:, c] |
| |
| |
| if channels2 == 1: |
| y2_ch = y2[:, 0] |
| else: |
| |
| y2_ch = y2[:, min(c, channels2-1)] |
| |
| |
| result_ch = self.process_channel(y1_ch, y2_ch, sr1, method) |
| result_channels.append(result_ch) |
| |
| |
| if len(result_channels) > 1: |
| result = np.column_stack(result_channels) |
| else: |
| result = np.array(result_channels[0]) |
| |
| |
| if result.ndim > 1: |
| |
| for c in range(result.shape[1]): |
| channel = result[:, c] |
| max_val = np.max(np.abs(channel)) |
| if max_val > 0: |
| result[:, c] = channel * 0.9 / max_val |
| else: |
| max_val = np.max(np.abs(result)) |
| if max_val > 0: |
| result = result * 0.9 / max_val |
| |
| folder_path = os.path.dirname(audio2_path) |
| |
| inverted_wav = os.path.join(folder_path, "inverted.wav") |
| sf.write(inverted_wav, result, sr1) |
| inverted = os.path.join(folder_path, f"inverted_ensemble.{out_format}") |
| write_audio_file(inverted, result.T, sr1, out_format, "320k") |
| return inverted, inverted_wav |
| |
| class EnsembLess: |
| def __init__(self): |
| self.test = "test" |
|
|
| def get_model_types(self): |
| return mvsepless.get_mt() |
| |
| def get_models_by_type(self, model_type): |
| return mvsepless.get_mn(model_type) |
| |
| def get_stems_by_model(self, model_type, model_name): |
| stems = mvsepless.get_stems(model_type, model_name) |
| if set(stems) == {"bass", "drums", "vocals", "other"} or set(stems) == {"bass", "drums", "vocals", "other", "piano", "guitar"} and not mvsepless.get_tgt_inst(model_type, model_name): |
| stems.append("instrumental +") |
| stems.append("instrumental -") |
| return stems |
| |
| def get_invert_stems_by_model(self, model_type, model_name, primary_stem): |
| invert_stems = [] |
| stems = mvsepless.get_stems(model_type, model_name) |
| for stem in stems: |
| if stem != primary_stem: |
| invert_stems.append(stem) |
| |
| if not mvsepless.get_tgt_inst(model_type, model_name) and model_type not in ["vr", "mdx"]: |
| |
| invert_stems.append("inverted +") |
| invert_stems.append("inverted -") |
| |
| return invert_stems |
| |
| def invert_weights(self, weights): |
| total_weight = sum(weights) |
| return [total_weight - w for w in weights] |
|
|
| def analyze_sample_rate(self, files): |
| """ |
| Анализирует частоту дискретизации для списка аудиофайлов |
| Возвращает форматированную строку с результатами |
| """ |
| if not files: |
| return t("error_no_files") |
| |
| results = [] |
| common_rate = None |
| all_same = True |
| |
| for file_info in files: |
| try: |
| |
| audio = AudioSegment.from_file(file_info.name) |
| rate = audio.frame_rate |
| |
| |
| if common_rate is None: |
| common_rate = rate |
| elif common_rate != rate: |
| all_same = False |
| |
| results.append(f"{file_info.name.split('/')[-1]}: {rate} Hz") |
| |
| except CouldntDecodeError: |
| results.append(f"{file_info.name.split('/')[-1]}: {t('error_unsupported_format')}") |
| except Exception as e: |
| results.append(f"{file_info.name.split('/')[-1]}: {t('error_general', error=str(e))}") |
| |
| |
| header = t("analyze_title") + "\n" + "-" * 50 + "\n" |
| body = "\n".join(results) |
| footer = "\n" + "-" * 50 + "\n" |
| |
| if all_same and common_rate is not None: |
| footer += f"\n{t('all_same_rate', rate=common_rate)}" |
| elif common_rate is not None: |
| footer += f"\n{t('different_rates')}" |
| |
| return header + body + footer |
|
|
| def resample_audio(self, audio_path): |
| if not audio_path or not os.path.isfile(audio_path): |
| gr.Warning(t("error_no_audio")) |
| return None |
| |
| original_name = os.path.splitext(os.path.basename(audio_path))[0] |
| folder_path = os.path.dirname(audio_path) |
| resampled_path = os.path.join(folder_path, f"resampled_{original_name}.wav") |
| |
| target_sr = 44100 |
| |
| |
| y, orig_sr = librosa.load(audio_path, sr=None, mono=False) |
| |
| |
| if y.ndim == 1: |
| channels = 1 |
| y = y.reshape(-1, 1) |
| else: |
| channels = y.shape[0] |
| y = y.T |
| |
| |
| if orig_sr != target_sr: |
| resampled_channels = [] |
| for channel in range(channels): |
| channel_data = y[:, channel] |
| resampled = librosa.resample( |
| y=channel_data, |
| orig_sr=orig_sr, |
| target_sr=target_sr, |
| res_type="kaiser_best" |
| ) |
| resampled_channels.append(resampled) |
| |
| |
| min_length = min(len(c) for c in resampled_channels) |
| resampled_data = np.vstack([c[:min_length] for c in resampled_channels]).T |
| else: |
| resampled_data = y |
| |
| |
| sf.write( |
| resampled_path, |
| resampled_data, |
| target_sr, |
| subtype="PCM_16" |
| ) |
| |
| gr.Warning(message=t("resample_warning")) |
| return resampled_path |
|
|
| def maximize_length_audio(self, output): |
| padded_files = [] |
| audio_data = [] |
| max_length = 0 |
| for file in output: |
| data, sr = librosa.load(file, sr=None, mono=False) |
| if data.ndim == 1: |
| data = np.stack([data, data]) |
| elif data.shape[0] != 2: |
| data = data.T |
| audio_data.append([file, data]) |
| max_length = max(max_length, data.shape[1]) |
| |
| for i, [file, data] in enumerate(audio_data): |
| if data.shape[1] < max_length: |
| pad_width = ((0, 0), (0, max_length - data.shape[1])) |
| padded_data = np.pad(data, pad_width, mode='constant') |
| else: |
| padded_data = data |
| sf.write(file, padded_data.T, sr) |
| padded_files.append(file) |
| return padded_files |
|
|
| def maximize_length_audio_wav(self, output): |
| padded_files = [] |
| audio_data = [] |
| max_length = 0 |
| for file in output: |
| data, sr = sf.read(file) |
| if data.ndim == 1: |
| data = np.stack([data, data]) |
| elif data.shape[0] != 2: |
| data = data.T |
| audio_data.append([file, data]) |
| max_length = max(max_length, data.shape[1]) |
| |
| for i, [file, data] in enumerate(audio_data): |
| if data.shape[1] < max_length: |
| pad_width = ((0, 0), (0, max_length - data.shape[1])) |
| padded_data = np.pad(data, pad_width, mode='constant') |
| else: |
| padded_data = data |
| sf.write(file, padded_data.T, sr) |
| padded_files.append(file) |
| return padded_files |
|
|
| def manual_ensemble(self, input_audios, method, weights, out_format): |
| temp_dir = tempfile.mkdtemp() |
| weights = [float(x) for x in weights.split(",")] |
| |
| a1, a2 = ensemble_audio_files(input_audios, output=os.path.join(temp_dir, f"ensemble_{method}"), ensemble_type=method, weights=weights, out_format=out_format) |
| return a1, a2 |
|
|
| def auto_ensemble(self, input_audio, input_settings, type, out_format, invert_weights, invert_ensemble): |
| |
| progress = gr.Progress() |
| progress(0, desc=f"{t('process1')}...") |
| |
| base_name = os.path.splitext(os.path.basename(input_audio))[0] |
| temp_dir = tempfile.mkdtemp() |
| source_files = [] |
| output_p_files = [] |
| output_s_files = [] |
| output_p_weights = [] |
| |
| block_count = len(input_settings) |
| |
| for i, (input_model, weight, p_stem, s_stem) in enumerate(input_settings): |
| output_s_files.append(None) |
| progress(i / block_count, desc=f"{t('process2')} {i+1}/{block_count}") |
| model_type, model_name = input_model.split(" / ") |
| output_dir_p = os.path.join(temp_dir, f"{model_type}_{model_name}_p_stems") |
| output_p = mvsepless.separator(input_file=input_audio, output_dir=output_dir_p, model_type=model_type, model_name=model_name, ext_inst=True, vr_aggr=10, output_format="wav", template="MODEL_STEM", call_method="cli") |
| for stem, file in output_p: |
| source_files.append(file) |
| if stem == p_stem: |
| output_p_files.append(file) |
| output_p_weights.append(weight) |
| elif invert_ensemble: |
| if stem == s_stem: |
| output_s_files[i] = file |
| |
| if invert_ensemble: |
| if not output_s_files[i]: |
| |
| output_dir_s = os.path.join(temp_dir, f"{model_type}_{model_name}_s_stems") |
| output_s = mvsepless.separator(input_file=input_audio, output_dir=output_dir_s, model_type=model_type, model_name=model_name, ext_inst=True, vr_aggr=10, output_format="wav", template="MODEL_STEM", call_method="cli", selected_stems=[p_stem if not mvsepless.get_tgt_inst(model_type, model_name) else "both"]) |
| for stem, file in output_s: |
| source_files.append(file) |
| if stem == s_stem: |
| output_s_files[i] = file |
| source_files.append(file) |
| |
| progress(0.9, desc=f"{t('process3')}...") |
| |
| if invert_ensemble: |
| |
| pass |
| progress(0.95, desc=f"{t('process4')}...") |
| if invert_ensemble: |
| if invert_weights: |
| output_s_weights = self.invert_weights(output_p_weights) |
| else: |
| output_s_weights = output_p_weights |
| output_s, output_wav_s = ensemble_audio_files(files=output_s_files, output=os.path.join(temp_dir, f"ensemble_invert_{base_name}_{type}"), ensemble_type=INVERT_METHODS[type], weights=output_s_weights, out_format=out_format) |
| else: |
| output_s, output_wav_s = None, None |
| |
| output_p, output_wav_p = ensemble_audio_files(files=output_p_files, output=os.path.join(temp_dir, f"ensemble_{base_name}_{type}"), ensemble_type=type, weights=output_p_weights, out_format=out_format) |
| |
| return output_p, output_wav_p, output_s, output_wav_s, source_files |
|
|
| class EnsembleManager: |
| def __init__(self): |
| self.models = [] |
| self.presets_dir = os.path.join(os.getcwd(), "presets") |
| os.makedirs(self.presets_dir, exist_ok=True) |
| |
| def export_preset(self, name): |
| if not name: |
| name = "ensembless_preset" |
| filepath = os.path.join(self.presets_dir, f"{name}.json") |
| with open(filepath, 'w') as f: |
| json.dump(self.models, f) |
| return filepath |
|
|
| def import_preset(self, filepath): |
| with open(filepath, 'r') as f: |
| self.models = json.load(f) |
| return self.get_df() |
| |
| def add_model(self, model_type, model_name, p_stem, s_stem, weight): |
| model_info = { |
| 'type': model_type, |
| 'name': model_name, |
| 'p_stem': p_stem, |
| 's_stem': s_stem, |
| 'weight': float(weight) |
| } |
| self.models.append(model_info) |
| return self.get_df() |
| |
| def remove_model(self, index): |
| if 0 <= index < len(self.models): |
| del self.models[index] |
| return self.get_df() |
| |
| def clear_models(self): |
| self.models = [] |
| return self.get_df() |
| |
| def get_df(self): |
| if not self.models: |
| columns = ["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")] |
| return pd.DataFrame(columns=columns) |
| |
| data = [] |
| for i, model in enumerate(self.models): |
| data.append([ |
| f"{i+1}", |
| model['type'], |
| model['name'], |
| model['p_stem'], |
| model['s_stem'], |
| model['weight'] |
| ]) |
| columns = ["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")] |
| return pd.DataFrame(data, columns=columns) |
| |
| def get_settings(self): |
| return [(f"{m['type']} / {m['name']}", m['weight'], m['p_stem'], m['s_stem']) for m in self.models] |
|
|
| inverter = Inverter() |
| manager = EnsembleManager() |
| ensembless = EnsembLess() |
|
|
| class EnsembLess_ui_updates: |
|
|
| def update_model_dropdown(self, model_type): |
| models = ensembless.get_models_by_type(model_type) |
| return gr.Dropdown(choices=models, value=models[0] if models else None) |
| |
| def update_stem_dropdown(self, model_type, model_name): |
| stems = ensembless.get_stems_by_model(model_type, model_name) |
| return gr.Dropdown(choices=stems, value=stems[0] if stems else None) |
|
|
| def update_invert_stem_dropdown(self, model_type, model_name, primary_stem): |
| stems = ensembless.get_invert_stems_by_model(model_type, model_name, primary_stem) |
| return gr.Dropdown(choices=stems, value=stems[0] if stems else None) |
| |
| def add_model(self, model_type, model_name, p_stem, s_stem, weight): |
| return manager.add_model(model_type, model_name, p_stem, s_stem, weight) |
| |
| def remove_model(self, index): |
| if index >= 0: |
| return manager.remove_model(index-1) |
| return manager.get_df() |
| |
| def clear_all_models(self): |
| return manager.clear_models() |
| |
| def run_ensemble(self, input_audio, ensemble_type, output_format, invert_weights, invert_ensemble): |
| if not manager.models: |
| raise gr.Error(t("error_no_models")) |
| |
| if not input_audio: |
| raise gr.Error(t("error_no_audio")) |
| |
| input_settings = manager.get_settings() |
| |
| o, o_wav, i, i_wav, result_source = ensembless.auto_ensemble( |
| input_audio=input_audio, |
| input_settings=input_settings, |
| type=ensemble_type, |
| out_format=output_format, |
| invert_weights=invert_weights, |
| invert_ensemble=invert_ensemble, |
| ) |
| return o, o_wav, i, i_wav, result_source |
|
|
| ensembless_ui = EnsembLess_ui_updates() |
|
|
| def ensembless_plugin_name(): |
| return "EnsembLess" |
|
|
| |
| def ensembless_plugin(lang): |
| set_language(lang) |
|
|
| with gr.Tabs(): |
| with gr.Tab(t("auto_ensemble")): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| |
| gr.Markdown(f"### {t('model_selection')}") |
| model_type = gr.Dropdown( |
| choices=ensembless.get_model_types(), |
| label=t("model_type"), |
| value=ensembless.get_model_types()[0] if ensembless.get_model_types() else None, |
| filterable=False |
| ) |
| model_name = gr.Dropdown( |
| choices=ensembless.get_models_by_type(ensembless.get_model_types()[0]), |
| label=t("model_name"), |
| interactive=True, |
| value=ensembless.get_models_by_type(ensembless.get_model_types()[0])[0], |
| filterable=False |
| ) |
| stem = gr.Dropdown( |
| choices=ensembless.get_stems_by_model(ensembless.get_model_types()[0], ensembless.get_models_by_type(ensembless.get_model_types()[0])[0]), |
| label=t("p_stem"), |
| interactive=True, |
| filterable=False |
| ) |
| invert_stem = gr.Dropdown( |
| choices=ensembless.get_invert_stems_by_model(ensembless.get_model_types()[0], ensembless.get_models_by_type(ensembless.get_model_types()[0])[0], "vocals"), |
| label=t("s_stem"), |
| interactive=True, |
| filterable=False |
| ) |
| |
| weight = gr.Slider( |
| label=t("weight"), |
| value=1.0, |
| minimum=0.1, |
| maximum=10.0, |
| step=0.1 |
| ) |
| add_btn = gr.Button(t("add_button"), variant="primary") |
| |
| with gr.Column(scale=2): |
| |
| gr.Markdown(f"### {t('current_ensemble')}") |
| ensemble_df = gr.Dataframe( |
| value=manager.get_df(), |
| headers=["#", t("model_type"), t("model_name"), t("p_stem"), t("s_stem"), t("weight")], |
| datatype=["str", "str", "str", "str", "str", "number"], |
| interactive=False |
| ) |
| with gr.Row(equal_height=True): |
| export_preset_name = gr.Textbox(label=t("give_name_preset"), interactive=True, value="ensembless_preset") |
| with gr.Column(): |
| export_btn = gr.DownloadButton(t("export"), variant="secondary") |
| import_btn = gr.UploadButton(t("import"), file_types=[".json"], file_count="single") |
| with gr.Row(equal_height=True): |
| remove_idx = gr.Number( |
| label=t("remove_index"), |
| precision=0, |
| minimum=1, |
| interactive=True |
| ) |
| with gr.Column(): |
| remove_btn = gr.Button(t("remove_button"), variant="stop") |
| clear_btn = gr.Button(t("clear_button"), variant="stop") |
| |
| |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown(f"### {t('input_audio')}") |
| input_audio = gr.Audio(type="filepath", show_label=False) |
| input_audio_resampled = gr.Text(visible=False) |
| |
| gr.Markdown(f"### {t('settings')}") |
| ensemble_type = gr.Dropdown( |
| choices=['avg_wave', 'median_wave', 'min_wave', 'max_wave', |
| 'avg_fft', 'median_fft', 'min_fft', 'max_fft'], |
| value='avg_fft', |
| label=t("method"), |
| filterable=False |
| ) |
| invert_ensem = gr.Checkbox(label=t("invert_ensemble")) |
| invert_weights = gr.Checkbox(label=t("invert_weights")) |
| output_format = gr.Dropdown( |
| choices=["wav", "mp3", "flac", "m4a", "aac", "ogg", "opus", "aiff"], |
| value="mp3", |
| label=t("output_format"), |
| filterable=False |
| ) |
| run_btn = gr.Button(t("run_button"), variant="primary") |
|
|
| with gr.Column(): |
| with gr.Tab(t('results')): |
| |
| with gr.Column(): |
| output_audio = gr.Audio(label=t("results"), type="filepath", interactive=False, show_download_button=True) |
| output_wav = gr.Text(label="Результат в WAV", interactive=False, visible=False) |
| |
| gr.Markdown(f"###### {t('inverted_result')}") |
| |
| invert_method = gr.Radio( |
| choices=["waveform", "spectrogram"], |
| label=t("invert_method"), |
| value="waveform" |
| ) |
| invert_btn = gr.Button(t("invert_button")) |
| inverted_output_audio = gr.Audio(label=t("inverted_result"), type="filepath", interactive=False, show_download_button=True) |
| inverted_wav = gr.Text(label="Инвертированный результат в WAV", interactive=False, visible=False) |
| |
| with gr.Tab(t('result_source')): |
| result_source = gr.Files(interactive=False, label=t('result_source')) |
| |
| stem.change(ensembless_ui.update_invert_stem_dropdown, inputs=[model_type, model_name, stem], outputs=invert_stem) |
|
|
| model_type.change( |
| ensembless_ui.update_model_dropdown, |
| inputs=model_type, |
| outputs=model_name |
| ) |
| model_name.change( |
| ensembless_ui.update_stem_dropdown, |
| inputs=[model_type, model_name], |
| outputs=stem |
| ) |
|
|
| ensemble_df.change( |
| manager.export_preset, |
| inputs=export_preset_name, |
| outputs=export_btn |
| ) |
| |
| export_preset_name.change( |
| manager.export_preset, |
| inputs=export_preset_name, |
| outputs=export_btn |
| ) |
| |
| import_btn.upload( |
| manager.import_preset, |
| inputs=import_btn, |
| outputs=ensemble_df |
| ) |
|
|
| invert_btn.click( |
| inverter.process_audio, |
| inputs=[input_audio_resampled, output_wav, output_format, invert_method], |
| outputs=[inverted_output_audio, inverted_wav] |
| ) |
| |
| input_audio.upload( |
| ensembless.resample_audio, |
| inputs=input_audio, |
| outputs=input_audio_resampled |
| ) |
| |
| add_btn.click( |
| ensembless_ui.add_model, |
| inputs=[model_type, model_name, stem, invert_stem, weight], |
| outputs=ensemble_df |
| ) |
| |
| remove_btn.click( |
| ensembless_ui.remove_model, |
| inputs=remove_idx, |
| outputs=ensemble_df |
| ) |
| |
| clear_btn.click( |
| ensembless_ui.clear_all_models, |
| outputs=ensemble_df |
| ) |
| |
| run_btn.click( |
| ensembless_ui.run_ensemble, |
| inputs=[input_audio_resampled, ensemble_type, output_format, invert_weights, invert_ensem], |
| outputs=[output_audio, output_wav, inverted_output_audio, inverted_wav, result_source] |
| ) |
|
|
| with gr.Tab(t("manual_ensemble")): |
| with gr.Row(equal_height=True): |
| input_files = gr.Files(show_label=False, type="filepath", file_types=[".wav", ".mp3", ".flac", ".m4a", ".aac", ".ogg", ".opus", ".aiff"]) |
| with gr.Column(): |
| info_audios = gr.Textbox(label="", interactive=False) |
| man_method = gr.Dropdown( |
| choices=['avg_wave', 'median_wave', 'min_wave', 'max_wave', |
| 'avg_fft', 'median_fft', 'min_fft', 'max_fft'], |
| value='avg_fft', |
| label=t("method"), |
| filterable=False |
| ) |
| |
| weights_input = gr.Textbox(label=t("weights_input"), value="1.0,1.0") |
| |
| output_man_format = gr.Dropdown( |
| choices=["wav", "mp3", "flac", "m4a", "aac", "ogg", "opus", "aiff"], |
| value="mp3", |
| label=t("output_format"), |
| filterable=False |
| ) |
|
|
| run_man_btn = gr.Button(t("run_button"), variant="primary") |
| |
| output_man_audio = gr.Audio(label=t("results"), type="filepath", interactive=False, show_download_button=True) |
| output_man_wav = gr.Text(label="Результат в WAV", interactive=False, visible=False) |
| |
| input_files.upload( |
| fn=ensembless.analyze_sample_rate, |
| inputs=input_files, |
| outputs=info_audios |
| ) |
| |
| run_man_btn.click( |
| ensembless.manual_ensemble, |
| inputs=[input_files, man_method, weights_input, output_man_format], |
| outputs=[output_man_audio, output_man_wav] |
| ) |