| import os |
| import sys |
| from dotenv import load_dotenv |
| import requests |
| import wave |
| import zipfile |
| now_dir = os.getcwd() |
| sys.path.append(now_dir) |
| load_dotenv() |
| from infer.modules.vc.modules import VC |
| from infer.modules.uvr5.modules import UVRHANDLER |
| from infer.lib.train.process_ckpt import ( |
| change_info, |
| extract_small_model, |
| merge, |
| show_info, |
| ) |
| from i18n.i18n import I18nAuto |
| from configs.config import Config |
| from sklearn.cluster import MiniBatchKMeans |
| import torch |
| import numpy as np |
| import gradio as gr |
| import faiss |
| import fairseq |
| import librosa |
| import librosa.display |
| import pathlib |
| import json |
| from pydub import AudioSegment |
| from time import sleep |
| from subprocess import Popen |
| from random import shuffle |
| import warnings |
| import traceback |
| import threading |
| import shutil |
| import logging |
| import matplotlib.pyplot as plt |
| import soundfile as sf |
| from dotenv import load_dotenv |
| from tools import pretrain_helper |
|
|
| import edge_tts, asyncio |
| from infer.modules.vc.ilariatts import tts_order_voice |
| language_dict = tts_order_voice |
| ilariavoices = list(language_dict.keys()) |
|
|
| now_dir = os.getcwd() |
| sys.path.append(now_dir) |
| load_dotenv() |
|
|
| logging.getLogger("numba").setLevel(logging.WARNING) |
|
|
| logger = logging.getLogger(__name__) |
|
|
| tmp = os.path.join(now_dir, "TEMP") |
| shutil.rmtree(tmp, ignore_errors=True) |
| shutil.rmtree("%s/runtime/Lib/site-packages/infer_pack" % now_dir, ignore_errors=True) |
| os.makedirs(tmp, exist_ok=True) |
| os.makedirs(os.path.join(now_dir, "logs"), exist_ok=True) |
| os.makedirs(os.path.join(now_dir, "models/pth"), exist_ok=True) |
| os.environ["TEMP"] = tmp |
| warnings.filterwarnings("ignore") |
| torch.manual_seed(114514) |
|
|
| config = Config() |
| vc = VC(config) |
|
|
| weight_root = os.getenv("weight_root") |
| weight_uvr5_root = os.getenv("weight_uvr5_root") |
| index_root = os.getenv("index_root") |
|
|
| names = [] |
| for name in os.listdir(weight_root): |
| if name.endswith(".pth"): |
| names.append(name) |
| index_paths = [] |
| for root, dirs, files in os.walk(index_root, topdown=False): |
| for name in files: |
| if name.endswith(".index") and "trained" not in name: |
| index_paths.append("%s/%s" % (root, name)) |
|
|
| uvr5_names = [ |
| '5_HP-Karaoke-UVR.pth', |
| 'Kim_Vocal_2.onnx', |
| 'MDX23C-8KFFT-InstVoc_HQ_2.ckpt', |
| 'UVR-DeEcho-DeReverb.pth', |
| 'UVR-Denoise', |
| ] |
| if config.dml: |
| def forward_dml(ctx, x, scale): |
| ctx.scale = scale |
| res = x.clone().detach() |
| return res |
|
|
|
|
| fairseq.modules.grad_multiply.GradMultiply.forward = forward_dml |
| i18n = I18nAuto() |
| logger.info(i18n) |
| ngpu = torch.cuda.device_count() |
| gpu_infos = [] |
| mem = [] |
| if_gpu_ok = False |
|
|
| if torch.cuda.is_available() or ngpu != 0: |
| for i in range(ngpu): |
| gpu_name = torch.cuda.get_device_name(i) |
| if any( |
| value in gpu_name.upper() |
| for value in [ |
| "10", |
| "16", |
| "20", |
| "30", |
| "40", |
| "A2", |
| "A3", |
| "A4", |
| "P4", |
| "A50", |
| "500", |
| "A60", |
| "70", |
| "80", |
| "90", |
| "M4", |
| "T4", |
| "TITAN", |
| ] |
| ): |
| if_gpu_ok = True |
| gpu_infos.append("%s\t%s" % (i, gpu_name)) |
| mem.append( |
| int( |
| torch.cuda.get_device_properties(i).total_memory |
| / 1024 |
| / 1024 |
| / 1024 |
| + 0.4 |
| ) |
| ) |
| if if_gpu_ok and len(gpu_infos) > 0: |
| gpu_info = "\n".join(gpu_infos) |
| default_batch_size = ((min(mem) // 2 + 1) // 2) * 2 |
| else: |
| gpu_info = i18n("Your GPU doesn't work for training") |
| default_batch_size = 1 |
| gpus = "-".join([i[0] for i in gpu_infos]) |
|
|
|
|
| class ToolButton(gr.Button, gr.components.FormComponent): |
|
|
| def __init__(self, **kwargs): |
| super().__init__(variant="tool", **kwargs) |
|
|
| def get_block_name(self): |
| return "button" |
|
|
| weight_root = os.getenv("weight_root") |
| index_root = os.getenv("index_root") |
| audio_root = "audios" |
| sup_audioext = {'wav', 'mp3', 'flac', 'ogg', 'opus', |
| 'm4a', 'mp4', 'aac', 'alac', 'wma', |
| 'aiff', 'webm', 'ac3'} |
|
|
| names = [os.path.join(root, file) |
| for root, _, files in os.walk(weight_root) |
| for file in files |
| if file.endswith((".pth", ".onnx"))] |
|
|
| indexes_list = [os.path.join(root, name) |
| for root, _, files in os.walk(index_root, topdown=False) |
| for name in files |
| if name.endswith(".index") and "trained" not in name] |
| audio_paths = [os.path.join(root, name) |
| for root, _, files in os.walk(audio_root, topdown=False) |
| for name in files |
| if name.endswith(tuple(sup_audioext))] |
| def get_pretrained_files(directory, keyword, filter_str): |
| file_paths = {} |
| for filename in os.listdir(directory): |
| if filename.endswith(".pth") and keyword in filename and filter_str in filename: |
| file_paths[filename] = os.path.join(directory, filename) |
| return file_paths |
|
|
| pretrained_directory = "assets/pretrained_v2" |
| pretrained_path = {filename: os.path.join(pretrained_directory, filename) for filename in os.listdir(pretrained_directory)} |
| pretrained_G_files = get_pretrained_files(pretrained_directory, "G", "f0") |
| pretrained_D_files = get_pretrained_files(pretrained_directory, "D", "f0") |
|
|
| def get_pretrained_models(path_str, f0_str, sr2): |
| sr_mapping = pretrain_helper.get_pretrained_models(f0_str) |
|
|
| pretrained_G_filename = sr_mapping.get(sr2, "") |
| pretrained_D_filename = pretrained_G_filename.replace("G", "D") |
|
|
| if not pretrained_G_filename or not pretrained_D_filename: |
| logging.warning(f"Pretrained models not found for sample rate {sr2}, will not use pretrained models") |
|
|
| return os.path.join(pretrained_directory, pretrained_G_filename), os.path.join(pretrained_directory, pretrained_D_filename) |
|
|
| names = [] |
| for name in os.listdir(weight_root): |
| if name.endswith(".pth"): |
| names.append(name) |
| index_paths = [] |
| for root, dirs, files in os.walk(index_root, topdown=False): |
| for name in files: |
| if name.endswith(".index") and "trained" not in name: |
| index_paths.append("%s/%s" % (root, name)) |
|
|
| def generate_spectrogram_and_get_info(audio_file): |
| y, sr = librosa.load(audio_file, sr=None) |
|
|
| S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=256) |
| log_S = librosa.amplitude_to_db(S, ref=np.max, top_db=256) |
|
|
| plt.figure(figsize=(12, 5.5)) |
| librosa.display.specshow(log_S, sr=sr, x_axis='time') |
| plt.colorbar(format='%+2.0f dB', pad=0.01) |
| plt.tight_layout(pad=0.5) |
|
|
| plt.savefig('spectrogram.png', dpi=500) |
|
|
| audio_info = sf.info(audio_file) |
| bit_depth = {'PCM_16': 16, 'FLOAT': 32}.get(audio_info.subtype, 0) |
| minutes, seconds = divmod(audio_info.duration, 60) |
| seconds, milliseconds = divmod(seconds, 1) |
| milliseconds *= 1000 |
| speed_in_kbps = audio_info.samplerate * bit_depth / 1000 |
| filename_without_extension, _ = os.path.splitext(os.path.basename(audio_file)) |
|
|
| info_table = f""" |
| | Information | Value | |
| | :---: | :---: | |
| | File Name | {filename_without_extension} | |
| | Duration | {int(minutes)} minutes - {int(seconds)} seconds - {int(milliseconds)} milliseconds | |
| | Bitrate | {speed_in_kbps} kbp/s | |
| | Audio Channels | {audio_info.channels} | |
| | Samples per second | {audio_info.samplerate} Hz | |
| | Bit per second | {audio_info.samplerate * audio_info.channels * bit_depth} bit/s | |
| """ |
|
|
| return info_table, "spectrogram.png" |
|
|
|
|
| def change_choices(): |
| names = [] |
| for name in os.listdir(weight_root): |
| if name.endswith(".pth"): |
| names.append(name) |
| index_paths = [] |
| for root, dirs, files in os.walk(index_root, topdown=False): |
| for name in files: |
| if name.endswith(".index") and "trained" not in name: |
| index_paths.append("%s/%s" % (root, name)) |
| audios = [os.path.join(audio_root, file) for file in os.listdir(os.path.join(now_dir, "audios"))] |
|
|
| return {"choices": sorted(names), "__type__": "update"}, {"choices": sorted(index_paths),"__type__": "update"},{ |
| "choices": sorted(audios), "__type__": "update" |
| } |
|
|
|
|
|
|
| |
| def tts_and_convert(ttsvoice, text, spk_item, vc_transform, f0_file, f0method, file_index1, file_index2, index_rate, filter_radius, resample_sr, rms_mix_rate, protect): |
|
|
| |
| vo=language_dict[ttsvoice] |
| asyncio.run(edge_tts.Communicate(text, vo).save("./TEMP/temp_ilariatts.mp3")) |
| aud_path = './TEMP/temp_ilariatts.mp3' |
|
|
| |
| vc_output1.update("Text converted successfully!") |
|
|
| |
| |
| return vc.vc_single(spk_item , None,aud_path, vc_transform, f0_file, f0method, file_index1, file_index2, index_rate, filter_radius, resample_sr, rms_mix_rate, protect) |
|
|
|
|
| def import_files(file): |
| if file is not None: |
| file_name = file.name |
| if file_name.endswith('.zip'): |
| with zipfile.ZipFile(file.name, 'r') as zip_ref: |
| |
| temp_dir = './TEMP' |
| zip_ref.extractall(temp_dir) |
| |
| for root, dirs, files in os.walk(temp_dir): |
| for file in files: |
| if file.endswith('.pth'): |
| destination = './models/pth/' + file |
| if not os.path.exists(destination): |
| shutil.move(os.path.join(root, file), destination) |
| else: |
| print(f"File {destination} already exists. Skipping.") |
| elif file.endswith('.index'): |
| destination = './models/index/' + file |
| if not os.path.exists(destination): |
| shutil.move(os.path.join(root, file), destination) |
| else: |
| print(f"File {destination} already exists. Skipping.") |
| |
| shutil.rmtree(temp_dir) |
| return "Zip file has been successfully extracted." |
| elif file_name.endswith('.pth'): |
| destination = './models/pth/' + os.path.basename(file.name) |
| if not os.path.exists(destination): |
| os.rename(file.name, destination) |
| else: |
| print(f"File {destination} already exists. Skipping.") |
| return "PTH file has been successfully imported." |
| elif file_name.endswith('.index'): |
| destination = './models/index/' + os.path.basename(file.name) |
| if not os.path.exists(destination): |
| os.rename(file.name, destination) |
| else: |
| print(f"File {destination} already exists. Skipping.") |
| return "Index file has been successfully imported." |
| else: |
| return "Unsupported file type." |
| else: |
| return "No file has been uploaded." |
|
|
| def import_button_click(file): |
| return import_files(file) |
|
|
| def calculate_remaining_time(epochs, seconds_per_epoch): |
| total_seconds = epochs * seconds_per_epoch |
|
|
| hours = total_seconds // 3600 |
| minutes = (total_seconds % 3600) // 60 |
| seconds = total_seconds % 60 |
|
|
| if hours == 0: |
| return f"{int(minutes)} minutes" |
| elif hours == 1: |
| return f"{int(hours)} hour and {int(minutes)} minutes" |
| else: |
| return f"{int(hours)} hours and {int(minutes)} minutes" |
|
|
| def get_audio_duration(audio_file_path): |
| audio_info = sf.info(audio_file_path) |
| duration_minutes = audio_info.duration / 60 |
| return duration_minutes |
| |
| def clean(): |
| return {"value": "", "__type__": "update"} |
|
|
|
|
| sr_dict = { |
| "32k": 32000, "40k": 40000, "48k": 48000, "OV2-32k": 32000, "OV2-40k": 40000, "RIN-40k": 40000, "Snowie-40k": 40000, "Snowie-48k": 48000, "SnowieV3.1-40k": 40000, "SnowieV3.1-32k": 32000, "SnowieV3.1-48k": 48000, "SnowieV3.1-RinE3-40K": 40000, "Italia-32k": 32000, |
| } |
|
|
| def durations(sample_rate, model_options, qualities, duration): |
| if duration <= 350: |
| return qualities['short'] |
| else: |
| if sample_rate == 32000: |
| return model_options['32k'] |
| elif sample_rate == 40000: |
| return model_options['40k'] |
| elif sample_rate == 48000: |
| return model_options['48k'] |
| else: |
| return qualities['other'] |
|
|
| def get_training_info(audio_file): |
| if audio_file is None: |
| return 'Please provide an audio file!' |
| duration = get_audio_duration(audio_file) |
| sample_rate = wave.open(audio_file, 'rb').getframerate() |
|
|
| training_info = { |
| (0, 2): (150, 'OV2'), |
| (2, 3): (200, 'OV2'), |
| (3, 5): (250, 'OV2'), |
| (5, 10): (300, 'Normal'), |
| (10, 25): (500, 'Normal'), |
| (25, 45): (700, 'Normal'), |
| (45, 60): (1000, 'Normal') |
| } |
|
|
| for (min_duration, max_duration), (epochs, pretrain) in training_info.items(): |
| if min_duration <= duration < max_duration: |
| break |
| else: |
| return 'Duration is not within the specified range!' |
|
|
| return f'You should use the **{pretrain}** pretrain with **{epochs}** epochs at **{sample_rate/1000}khz** sample rate.' |
|
|
|
|
| def if_done(done, p): |
| while 1: |
| if p.poll() is None: |
| sleep(0.5) |
| else: |
| break |
| done[0] = True |
|
|
| def on_button_click(audio_file_path): |
| return get_training_info(audio_file_path) |
|
|
| def download_from_url(url, model): |
| if url == '': |
| return "URL cannot be left empty." |
| if model == '': |
| return "You need to name your model. For example: Ilaria" |
|
|
| url = url.strip() |
| zip_dirs = ["zips", "unzips"] |
| for directory in zip_dirs: |
| if os.path.exists(directory): |
| shutil.rmtree(directory) |
|
|
| os.makedirs("zips", exist_ok=True) |
| os.makedirs("unzips", exist_ok=True) |
|
|
| zipfile = model + '.zip' |
| zipfile_path = './zips/' + zipfile |
|
|
| try: |
| if "drive.google.com" in url: |
| subprocess.run(["gdown", url, "--fuzzy", "-O", zipfile_path]) |
| elif "mega.nz" in url: |
| m = Mega() |
| m.download_url(url, './zips') |
| else: |
| response = requests.get(url) |
| response.raise_for_status() |
| with open(zipfile_path, 'wb') as file: |
| file.write(response.content) |
|
|
| shutil.unpack_archive(zipfile_path, "./unzips", 'zip') |
|
|
| for root, dirs, files in os.walk('./unzips'): |
| for file in files: |
| file_path = os.path.join(root, file) |
| if file.endswith(".index"): |
| os.makedirs(f'./models/index', exist_ok=True) |
| shutil.copy2(file_path, f'./models/index/{model}.index') |
| elif "G_" not in file and "D_" not in file and file.endswith(".pth"): |
| os.makedirs(f'./models/pth', exist_ok=True) |
| shutil.copy(file_path, f'./models/pth/{model}.pth') |
|
|
| shutil.rmtree("zips") |
| shutil.rmtree("unzips") |
| return "Model downloaded, you can go back to the inference page!" |
|
|
| except subprocess.CalledProcessError as e: |
| return f"ERROR - Download failed (gdown): {str(e)}" |
| except requests.exceptions.RequestException as e: |
| return f"ERROR - Download failed (requests): {str(e)}" |
| except Exception as e: |
| return f"ERROR - The test failed: {str(e)}" |
|
|
| def transfer_files(filething, dataset_dir='dataset/'): |
| file_names = [f.name for f in filething] |
| for f in file_names: |
| filename = os.path.basename(f) |
| destination = os.path.join(dataset_dir, filename) |
| shutil.copyfile(f, destination) |
| return "Transferred files to dataset directory!" |
|
|
| def if_done_multi(done, ps): |
| while 1: |
| flag = 1 |
| for p in ps: |
| if p.poll() is None: |
| flag = 0 |
| sleep(0.5) |
| break |
| if flag == 1: |
| break |
| done[0] = True |
|
|
|
|
| def preprocess_dataset(trainset_dir, exp_dir, sr, n_p): |
| sr = sr_dict[sr] |
| os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) |
| f = open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "w") |
| f.close() |
| per = 3.0 if config.is_half else 3.7 |
| cmd = '"%s" infer/modules/train/preprocess.py "%s" %s %s "%s/logs/%s" %s %.1f' % ( |
| config.python_cmd, |
| trainset_dir, |
| sr, |
| n_p, |
| now_dir, |
| exp_dir, |
| config.noparallel, |
| per, |
| ) |
| logger.info(cmd) |
| p = Popen(cmd, shell=True) |
| done = [False] |
| threading.Thread( |
| target=if_done, |
| args=( |
| done, |
| p, |
| ), |
| ).start() |
| while 1: |
| with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: |
| yield f.read() |
| sleep(1) |
| if done[0]: |
| break |
| with open("%s/logs/%s/preprocess.log" % (now_dir, exp_dir), "r") as f: |
| log = f.read() |
| logger.info(log) |
| yield log |
|
|
|
|
| def extract_f0_feature(gpus, n_p, f0method, if_f0, exp_dir, version19, gpus_rmvpe): |
| gpus = gpus.split("-") |
| os.makedirs("%s/logs/%s" % (now_dir, exp_dir), exist_ok=True) |
| f = open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "w") |
| f.close() |
| if if_f0: |
| if f0method != "rmvpe_gpu": |
| cmd = ( |
| '"%s" infer/modules/train/extract/extract_f0_print.py "%s/logs/%s" %s %s' |
| % ( |
| config.python_cmd, |
| now_dir, |
| exp_dir, |
| n_p, |
| f0method, |
| ) |
| ) |
| logger.info(cmd) |
| p = Popen( |
| cmd, shell=True, cwd=now_dir |
| ) |
| done = [False] |
| threading.Thread( |
| target=if_done, |
| args=( |
| done, |
| p, |
| ), |
| ).start() |
| else: |
| if gpus_rmvpe != "-": |
| gpus_rmvpe = gpus_rmvpe.split("-") |
| leng = len(gpus_rmvpe) |
| ps = [] |
| for idx, n_g in enumerate(gpus_rmvpe): |
| cmd = ( |
| '"%s" infer/modules/train/extract/extract_f0_rmvpe.py %s %s %s "%s/logs/%s" %s ' |
| % ( |
| config.python_cmd, |
| leng, |
| idx, |
| n_g, |
| now_dir, |
| exp_dir, |
| config.is_half, |
| ) |
| ) |
| logger.info(cmd) |
| p = Popen( |
| cmd, shell=True, cwd=now_dir |
| ) |
| ps.append(p) |
| done = [False] |
| threading.Thread( |
| target=if_done_multi, |
| args=( |
| done, |
| ps, |
| ), |
| ).start() |
| else: |
| cmd = ( |
| config.python_cmd |
| + ' infer/modules/train/extract/extract_f0_rmvpe_dml.py "%s/logs/%s" ' |
| % ( |
| now_dir, |
| exp_dir, |
| ) |
| ) |
| logger.info(cmd) |
| p = Popen( |
| cmd, shell=True, cwd=now_dir |
| ) |
| p.wait() |
| done = [True] |
| while 1: |
| with open( |
| "%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r" |
| ) as f: |
| yield f.read() |
| sleep(1) |
| if done[0]: |
| break |
| with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
| log = f.read() |
| logger.info(log) |
| yield log |
|
|
| leng = len(gpus) |
| ps = [] |
| for idx, n_g in enumerate(gpus): |
| cmd = ( |
| '"%s" infer/modules/train/extract_feature_print.py %s %s %s %s "%s/logs/%s" %s' |
| % ( |
| config.python_cmd, |
| config.device, |
| leng, |
| idx, |
| n_g, |
| now_dir, |
| exp_dir, |
| version19, |
| ) |
| ) |
| logger.info(cmd) |
| p = Popen( |
| cmd, shell=True, cwd=now_dir |
| ) |
| ps.append(p) |
| done = [False] |
| threading.Thread( |
| target=if_done_multi, |
| args=( |
| done, |
| ps, |
| ), |
| ).start() |
| while 1: |
| with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
| yield f.read() |
| sleep(1) |
| if done[0]: |
| break |
| with open("%s/logs/%s/extract_f0_feature.log" % (now_dir, exp_dir), "r") as f: |
| log = f.read() |
| logger.info(log) |
| yield log |
|
|
|
|
|
|
| def change_sr2(sr2, if_f0_3, version19): |
| path_str = "" if version19 == "v1" else "_v2" |
| f0_str = "f0" if if_f0_3 else "" |
| return get_pretrained_models(path_str, f0_str, sr2) |
|
|
|
|
| def change_version19(sr2, if_f0_3, version19): |
| path_str = "" if version19 == "v1" else "_v2" |
| if sr2 == "32k" and version19 == "v1": |
| sr2 = "40k" |
| to_return_sr2 = ( |
| {"choices": ["32k","40k", "48k"], "__type__": "update", "value": sr2} |
| if version19 == "v1" |
| else {"choices": ["32k", "40k", "48k", "OV2-32k", "OV2-40k", "RIN-40k","Snowie-40k","Snowie-48k","Italia-32k"], "__type__": "update", "value": sr2} |
| ) |
| f0_str = "f0" if if_f0_3 else "" |
| return ( |
| *get_pretrained_models(path_str, f0_str, sr2), |
| to_return_sr2, |
| ) |
|
|
| def change_f0(if_f0_3, sr2, version19): |
| path_str = "" if version19 == "v1" else "_v2" |
| return ( |
| {"visible": if_f0_3, "__type__": "update"}, |
| {"visible": if_f0_3, "__type__": "update"}, |
| *get_pretrained_models(path_str, "f0" if if_f0_3 is True else "", sr2), |
| ) |
|
|
| def click_train( |
| exp_dir1, |
| sr2, |
| if_f0_3, |
| spk_id5, |
| save_epoch10, |
| total_epoch11, |
| batch_size12, |
| if_save_latest13, |
| pretrained_G14, |
| pretrained_D15, |
| gpus16, |
| if_cache_gpu17, |
| if_save_every_weights18, |
| version19, |
| ): |
| global f0_dir, f0nsf_dir |
| exp_dir = "%s/logs/%s" % (now_dir, exp_dir1) |
| os.makedirs(exp_dir, exist_ok=True) |
| gt_wavs_dir = "%s/0_gt_wavs" % exp_dir |
| feature_dir = ( |
| "%s/3_feature256" % exp_dir |
| if version19 == "v1" |
| else "%s/3_feature768" % exp_dir |
| ) |
| if if_f0_3: |
| f0_dir = "%s/2a_f0" % exp_dir |
| f0nsf_dir = "%s/2b-f0nsf" % exp_dir |
| names = ( |
| set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) |
| & set([name.split(".")[0] for name in os.listdir(feature_dir)]) |
| & set([name.split(".")[0] for name in os.listdir(f0_dir)]) |
| & set([name.split(".")[0] for name in os.listdir(f0nsf_dir)]) |
| ) |
| else: |
| names = set([name.split(".")[0] for name in os.listdir(gt_wavs_dir)]) & set( |
| [name.split(".")[0] for name in os.listdir(feature_dir)] |
| ) |
| opt = [] |
| for name in names: |
| if if_f0_3: |
| opt.append( |
| "%s/%s.wav|%s/%s.npy|%s/%s.wav.npy|%s/%s.wav.npy|%s" |
| % ( |
| gt_wavs_dir.replace("\\", "\\\\"), |
| name, |
| feature_dir.replace("\\", "\\\\"), |
| name, |
| f0_dir.replace("\\", "\\\\"), |
| name, |
| f0nsf_dir.replace("\\", "\\\\"), |
| name, |
| spk_id5, |
| ) |
| ) |
| else: |
| opt.append( |
| "%s/%s.wav|%s/%s.npy|%s" |
| % ( |
| gt_wavs_dir.replace("\\", "\\\\"), |
| name, |
| feature_dir.replace("\\", "\\\\"), |
| name, |
| spk_id5, |
| ) |
| ) |
| fea_dim = 256 if version19 == "v1" else 768 |
| if if_f0_3: |
| for _ in range(2): |
| opt.append( |
| "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s/logs/mute/2a_f0/mute.wav.npy" |
| "|%s/logs/mute/2b-f0nsf/mute.wav.npy|%s" |
| % (now_dir, sr2, now_dir, fea_dim, now_dir, now_dir, spk_id5) |
| ) |
| else: |
| for _ in range(2): |
| opt.append( |
| "%s/logs/mute/0_gt_wavs/mute%s.wav|%s/logs/mute/3_feature%s/mute.npy|%s" |
| % (now_dir, sr2, now_dir, fea_dim, spk_id5) |
| ) |
| shuffle(opt) |
| with open("%s/filelist.txt" % exp_dir, "w") as f: |
| f.write("\n".join(opt)) |
| logger.debug("Write filelist done") |
| logger.info("Use gpus: %s", str(gpus16)) |
| if pretrained_G14 == "": |
| logger.info("No pretrained Generator") |
| if pretrained_D15 == "": |
| logger.info("No pretrained Discriminator") |
| if version19 == "v1" or sr2 == "40k": |
| config_path = "v1/%s.json" % sr2 |
| else: |
| config_path = "v2/%s.json" % sr2 |
| config_save_path = os.path.join(exp_dir, "config.json") |
| if not pathlib.Path(config_save_path).exists(): |
| with open(config_save_path, "w", encoding="utf-8") as f: |
| json.dump( |
| config.json_config[config_path], |
| f, |
| ensure_ascii=False, |
| indent=4, |
| sort_keys=True, |
| ) |
| f.write("\n") |
| if gpus16: |
| cmd = ( |
| '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -g %s -te %s -se %s %s %s -l %s -c %s ' |
| "-sw %s -v %s" |
| % ( |
| config.python_cmd, |
| exp_dir1, |
| sr2, |
| 1 if if_f0_3 else 0, |
| batch_size12, |
| gpus16, |
| total_epoch11, |
| save_epoch10, |
| "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", |
| "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", |
| 1 if if_save_latest13 == i18n("是") else 0, |
| 1 if if_cache_gpu17 == i18n("是") else 0, |
| 1 if if_save_every_weights18 == i18n("是") else 0, |
| version19, |
| ) |
| ) |
| else: |
| cmd = ( |
| '"%s" infer/modules/train/train.py -e "%s" -sr %s -f0 %s -bs %s -te %s -se %s %s %s -l %s -c %s -sw ' |
| "%s -v %s" |
| % ( |
| config.python_cmd, |
| exp_dir1, |
| sr2, |
| 1 if if_f0_3 else 0, |
| batch_size12, |
| total_epoch11, |
| save_epoch10, |
| "-pg %s" % pretrained_G14 if pretrained_G14 != "" else "", |
| "-pd %s" % pretrained_D15 if pretrained_D15 != "" else "", |
| 1 if if_save_latest13 == i18n("是") else 0, |
| 1 if if_cache_gpu17 == i18n("是") else 0, |
| 1 if if_save_every_weights18 == i18n("是") else 0, |
| version19, |
| ) |
| ) |
| logger.info(cmd) |
| p = Popen(cmd, shell=True, cwd=now_dir) |
| p.wait() |
| return "You can view console or train.log" |
|
|
| def train_index(exp_dir1, version19): |
| exp_dir = "logs/%s" % exp_dir1 |
| os.makedirs(exp_dir, exist_ok=True) |
| feature_dir = ( |
| "%s/3_feature256" % exp_dir |
| if version19 == "v1" |
| else "%s/3_feature768" % exp_dir |
| ) |
| if not os.path.exists(feature_dir): |
| return "Please perform Feature Extraction First!" |
| listdir_res = list(os.listdir(feature_dir)) |
| if len(listdir_res) == 0: |
| return "Please perform Feature Extraction First!" |
| infos = [] |
| npys = [] |
| for name in sorted(listdir_res): |
| phone = np.load("%s/%s" % (feature_dir, name)) |
| npys.append(phone) |
| big_npy = np.concatenate(npys, 0) |
| big_npy_idx = np.arange(big_npy.shape[0]) |
| np.random.shuffle(big_npy_idx) |
| big_npy = big_npy[big_npy_idx] |
| if big_npy.shape[0] > 2e5: |
| infos.append("Trying doing kmeans %s shape to 10k centers." % big_npy.shape[0]) |
| yield "\n".join(infos) |
| try: |
| big_npy = ( |
| MiniBatchKMeans( |
| n_clusters=10000, |
| verbose=True, |
| batch_size=256 * config.n_cpu, |
| compute_labels=False, |
| init="random", |
| ) |
| .fit(big_npy) |
| .cluster_centers_ |
| ) |
| except: |
| info = traceback.format_exc() |
| logger.info(info) |
| infos.append(info) |
| yield "\n".join(infos) |
|
|
| np.save("%s/total_fea.npy" % exp_dir, big_npy) |
| n_ivf = min(int(16 * np.sqrt(big_npy.shape[0])), big_npy.shape[0] // 39) |
| infos.append("%s,%s" % (big_npy.shape, n_ivf)) |
| yield "\n".join(infos) |
| index = faiss.index_factory(256 if version19 == "v1" else 768, "IVF%s,Flat" % n_ivf) |
| infos.append("training") |
| yield "\n".join(infos) |
| index_ivf = faiss.extract_index_ivf(index) |
| index_ivf.nprobe = 1 |
| index.train(big_npy) |
| faiss.write_index( |
| index, |
| "%s/trained_IVF%s_Flat_nprobe_%s_%s_%s.index" |
| % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
| ) |
|
|
| infos.append("adding") |
| yield "\n".join(infos) |
| batch_size_add = 8192 |
| for i in range(0, big_npy.shape[0], batch_size_add): |
| index.add(big_npy[i: i + batch_size_add]) |
| faiss.write_index( |
| index, |
| "%s/added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
| % (exp_dir, n_ivf, index_ivf.nprobe, exp_dir1, version19), |
| ) |
| infos.append( |
| "Success,added_IVF%s_Flat_nprobe_%s_%s_%s.index" |
| % (n_ivf, index_ivf.nprobe, exp_dir1, version19) |
| ) |
| yield "\n".join(infos) |
|
|
| F0GPUVisible = config.dml is False |
|
|
| def change_f0_method(f0method8): |
| if f0method8 == "rmvpe_gpu": |
| visible = F0GPUVisible |
| else: |
| visible = False |
| return {"visible": visible, "__type__": "update"} |
|
|
| vc_output1 = gr.Textbox(label=i18n("Console")) |
| vc_output2 = gr.Audio(label=i18n("Audio output")) |
|
|
| with gr.Blocks(title="Simple Ilaria RVC 💖") as app: |
| gr.Markdown("<h1> Simple Ilaria RVC 💖 </h1>") |
| gr.Markdown(value=i18n("Made with 💖 by Ilaria | Support her on [Ko-Fi](https://ko-fi.com/ilariaowo)")) |
| gr.Markdown(i18n("For voice models and support join [AI Hub](https://discord.gg/AIHUB)")) |
| with gr.Tabs(): |
| with gr.TabItem(i18n("Inference")): |
| with gr.Row(): |
| sid0= gr.Dropdown(label=i18n("Voice"), choices=sorted(names)) |
| sid1= sid0 |
| refresh_button = gr.Button(i18n("Refresh"), variant="primary") |
| clean_button = gr.Button(i18n("Unload Voice from VRAM"), variant="primary") |
| with gr.Row(): |
| spk_item = gr.Slider( |
| minimum=0, |
| maximum=2333, |
| step=1, |
| label=i18n("Speaker ID (Auto-Detected)"), |
| value=0, |
| visible=True, |
| interactive=False, |
| ) |
| vc_transform0 = gr.Slider( |
| label=i18n( |
| "Pitch: -24 is lower (2 octaves) and 24 is higher (2 octaves)"), |
| minimum=-24, |
| maximum=24, |
| default=0, |
| step=1, |
| ) |
| clean_button.click( |
| fn=clean, inputs=[], outputs=[sid0], api_name="infer_clean" |
| ) |
| with gr.Row(): |
| but0 = gr.Button(i18n("Convert"), variant="primary") |
| with gr.TabItem(i18n("Inference")): |
| with gr.Group(): |
| with gr.Row(): |
| with gr.Column(): |
| input_audio1 = gr.Audio( |
| label=i18n("Upload Audio file"), |
| type="filepath", |
| ) |
| record_button = gr.Audio(source="microphone", label="Use your microphone", |
| type="filepath") |
| |
| input_audio0 = gr.Dropdown( |
| label=i18n("Select a file from the audio folder"), |
| choices=sorted(audio_paths), |
| value='', |
| interactive=True, |
| ) |
| record_button.change( |
| fn=lambda x: x, |
| inputs=[record_button], |
| outputs=[input_audio0], |
| ) |
| file_index1 = gr.Textbox( |
| label=i18n("Path of index"), |
| placeholder=".\models\index", |
| interactive=True, |
| visible=False, |
| ) |
| file_index2 = gr.Textbox( |
| label=i18n("Auto-detect index path"), |
| choices=sorted(index_paths), |
| interactive=True, |
| visible=False, |
| ) |
| with gr.Column(): |
| with gr.Accordion('Advanced Settings', open=False, visible=False): |
| with gr.Column(): |
| f0method0 = gr.Radio( |
| label=i18n("Pitch Extraction, rmvpe is best"), |
| choices=["harvest", "crepe", "rmvpe"] |
| if config.dml is False |
| else ["harvest", "rmvpe"], |
| value="rmvpe", |
| interactive=True, |
| ) |
| with gr.Row(): |
| resample_sr0 = gr.Slider( |
| minimum=0, |
| maximum=48000, |
| label=i18n("Resampling, 0=none"), |
| value=0, |
| step=1, |
| interactive=True, |
| ) |
| with gr.Row(): |
| rms_mix_rate0 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("0=Input source volume, 1=Normalized Output"), |
| value=0.25, |
| interactive=True, |
| ) |
| protect0 = gr.Slider( |
| minimum=0, |
| maximum=0.5, |
| label=i18n( |
| "Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), |
| value=0.33, |
| step=0.01, |
| interactive=True, |
| ) |
| filter_radius0 = gr.Slider( |
| minimum=0, |
| maximum=7, |
| label=i18n(">=3 apply median filter to the harvested pitch results"), |
| value=3, |
| step=1, |
| interactive=True, |
| ) |
| with gr.Row(): |
| index_rate1 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("Index Ratio"), |
| value=0.40, |
| interactive=True, |
| ) |
| f0_file = gr.File( |
| label=i18n("F0 curve file [optional]"), |
| visible=False, |
| ) |
|
|
| refresh_button.click( |
| fn=change_choices, |
| inputs=[], |
| outputs=[sid0, file_index2, input_audio1], |
| api_name="infer_refresh", |
| ) |
| file_index1 = gr.Textbox( |
| label=i18n("Path of index"), |
| placeholder="%userprofile%\\Desktop\\models\\model_example.index", |
| interactive=True, |
| ) |
| file_index2 = gr.Dropdown( |
| label=i18n("Auto-detect index path"), |
| choices=sorted(index_paths), |
| interactive=True, |
| ) |
| |
|
|
| with gr.Accordion('IlariaTTS', open=True): |
| with gr.Column(): |
| ilariaid=gr.Dropdown(label="Voice:", choices=ilariavoices, interactive=True, value="English-Jenny (Female)") |
| with gr.Row(): |
| ilariatext = gr.Textbox(label="Input your Text", interactive=True, value="This is a test.") |
| ilariatts_button = gr.Button(value="Speak and Convert") |
| ilariatts_button.click(tts_and_convert, |
| [ilariaid, |
| ilariatext, |
| spk_item, |
| vc_transform0, |
| f0_file, |
| f0method0, |
| file_index1, |
| file_index2, |
| index_rate1, |
| filter_radius0, |
| resample_sr0, |
| rms_mix_rate0, |
| protect0] |
| , [vc_output1, vc_output2]) |
| |
| |
| with gr.Accordion('Advanced Settings', open=False, visible=True): |
| with gr.Column(): |
| f0method0 = gr.Radio( |
| label=i18n("Pitch Extraction, rmvpe is best"), |
| choices=["harvest", "crepe", "rmvpe"] |
| if config.dml is False |
| else ["harvest", "rmvpe"], |
| value="rmvpe", |
| interactive=True, |
| ) |
| resample_sr0 = gr.Slider( |
| minimum=0, |
| maximum=48000, |
| label=i18n("Resampling, 0=none"), |
| value=0, |
| step=1, |
| interactive=True, |
| ) |
| rms_mix_rate0 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("0=Input source volume, 1=Normalized Output"), |
| value=0.25, |
| interactive=True, |
| ) |
| protect0 = gr.Slider( |
| minimum=0, |
| maximum=0.5, |
| label=i18n( |
| "Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), |
| value=0.33, |
| step=0.01, |
| interactive=True, |
| ) |
| filter_radius0 = gr.Slider( |
| minimum=0, |
| maximum=7, |
| label=i18n(">=3 apply median filter to the harvested pitch results"), |
| value=3, |
| step=1, |
| interactive=True, |
| ) |
| index_rate1 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("Index Ratio"), |
| value=0.40, |
| interactive=True, |
| ) |
| f0_file = gr.File( |
| label=i18n("F0 curve file [optional]"), |
| visible=False, |
| ) |
|
|
| refresh_button.click( |
| fn=change_choices, |
| inputs=[], |
| outputs=[sid0, file_index2], |
| api_name="infer_refresh", |
| ) |
| file_index1 = gr.Textbox( |
| label=i18n("Path of index"), |
| placeholder="%userprofile%\\Desktop\\models\\model_example.index", |
| interactive=True, |
| ) |
| file_index2 = gr.Dropdown( |
| label=i18n("Auto-detect index path"), |
| choices=sorted(index_paths), |
| interactive=True, |
| ) |
|
|
| with gr.Group(): |
| with gr.Column(): |
| vc_output1.render() |
| with gr.Column(): |
| vc_output2.render() |
|
|
| but0.click( |
| vc.vc_single, |
| [ |
| spk_item, |
| input_audio0, |
| input_audio1, |
| vc_transform0, |
| f0_file, |
| f0method0, |
| file_index1, |
| file_index2, |
| |
| index_rate1, |
| filter_radius0, |
| resample_sr0, |
| rms_mix_rate0, |
| protect0, |
| ], |
| [vc_output1, vc_output2], |
| api_name="infer_convert", |
| ) |
| with gr.TabItem("Download Voice Models"): |
| gr.Markdown(i18n("# For models found in [AI Hub](https://discord.gg/AIHUB)")) |
| with gr.Row(): |
| url = gr.Textbox(label="Huggingface Link:") |
| model = gr.Textbox(label="Name of the model (without spaces):") |
| download_button = gr.Button("Download") |
| with gr.Row(): |
| status_bar = gr.Textbox(label="Download Status") |
| download_button.click(fn=download_from_url, inputs=[url, model], outputs=[status_bar]) |
|
|
| with gr.TabItem("Import Models"): |
| gr.Markdown(i18n("For models found on [Weights](https://weights.gg)")) |
| file_upload = gr.File(label="Upload a .zip file containing a .pth and .index file") |
| import_button = gr.Button("Import") |
| import_status = gr.Textbox(label="Import Status") |
| import_button.click(fn=import_button_click, inputs=file_upload, outputs=import_status) |
|
|
| with gr.TabItem(i18n("Batch Inference")): |
| gr.Markdown( |
| value=i18n("Batch Conversion") |
| ) |
| |
| with gr.Row(): |
| with gr.Column(): |
| vc_transform1 = gr.Number( |
| label=i18n("Pitch: 0 from man to man (or woman to woman); 12 from man to woman and -12 from woman to man."), |
| value=0 |
| ) |
| opt_input = gr.Textbox(label=i18n("Output"), value="InferOutput") |
| file_index3 = gr.Textbox( |
| label=i18n("Path to index"), |
| value="", |
| interactive=True, |
| ) |
| file_index4 = gr.Dropdown( |
| label=i18n("Auto-detect index path"), |
| choices=sorted(index_paths), |
| interactive=True, |
| ) |
| f0method1 = gr.Radio( |
| label=i18n("Pitch Extraction, rmvpe is best"), |
| choices=["harvest", "crepe", "rmvpe"] |
| if config.dml is False |
| else ["harvest", "rmvpe"], |
| value="rmvpe", |
| interactive=True, |
| ) |
| format1 = gr.Radio( |
| label=i18n("Export Format"), |
| choices=["flac", "wav", "mp3", "m4a"], |
| value="flac", |
| interactive=True, |
| ) |
|
|
| refresh_button.click( |
| fn=lambda: change_choices()[1], |
| inputs=[], |
| outputs=file_index4, |
| api_name="infer_refresh_batch", |
| ) |
|
|
| with gr.Column(): |
| resample_sr1 = gr.Slider( |
| minimum=0, |
| maximum=48000, |
| label=i18n("Resampling, 0=none"), |
| value=0, |
| step=1, |
| interactive=True, |
| ) |
| rms_mix_rate1 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("0=Input source volume, 1=Normalized Output"), |
| value=0.25, |
| interactive=True, |
| ) |
| protect1 = gr.Slider( |
| minimum=0, |
| maximum=0.5, |
| label=i18n( |
| "Protect clear consonants and breathing sounds, preventing electro-acoustic tearing and other artifacts, 0.5 does not open"), |
| value=0.33, |
| step=0.01, |
| interactive=True, |
| ) |
| filter_radius1 = gr.Slider( |
| minimum=0, |
| maximum=7, |
| label=i18n(">=3 apply median filter to the harvested pitch results"), |
| value=3, |
| step=1, |
| interactive=True, |
| ) |
| index_rate2 = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("Index Ratio"), |
| value=0.40, |
| interactive=True, |
| ) |
| with gr.Row(): |
| dir_input = gr.Textbox( |
| label=i18n("Enter the path to the audio folder to be processed"), |
| placeholder="%userprofile%\\Desktop\\covers", |
| ) |
| inputs = gr.File( |
| file_count="multiple", label=i18n("Audio files can also be imported in batch") |
| ) |
|
|
| with gr.Row(): |
| but1 = gr.Button(i18n("Convert"), variant="primary") |
| vc_output3 = gr.Textbox(label=i18n("Console")) |
|
|
| but1.click( |
| vc.vc_multi, |
| [ |
| spk_item, |
| dir_input, |
| opt_input, |
| inputs, |
| vc_transform1, |
| f0method1, |
| file_index3, |
| file_index4, |
| |
| index_rate2, |
| filter_radius1, |
| resample_sr1, |
| rms_mix_rate1, |
| protect1, |
| format1, |
| ], |
| [vc_output3], |
| api_name="infer_convert_batch", |
| ) |
| with gr.TabItem(i18n("Train")): |
| gr.Markdown(value=i18n("")) |
| with gr.Row(): |
| exp_dir1 = gr.Textbox(label=i18n("Model Name"), value="test-model") |
| sr2 = gr.Dropdown( |
| label=i18n("Sample Rate & Pretrain"), |
| choices=["32k", "40k", "48k", "OV2-32k", "OV2-40k", "RIN-40k", "Snowie-40k", "Snowie-48k", "SnowieV3.1-40k","SnowieV3.1-32k","SnowieV3.1-48k","SnowieV3.1-RinE3-40K","Italia-32k"], |
| value="32k", |
| interactive=True, |
| ) |
| version19 = gr.Radio( |
| label=i18n("Version 2 only here"), |
| choices=["v2"], |
| value="v2", |
| interactive=False, |
| visible=False, |
| ) |
| np7 = gr.Slider( |
| minimum=0, |
| maximum=config.n_cpu, |
| step=1, |
| label=i18n("CPU Threads"), |
| value=int(np.ceil(config.n_cpu / 2.5)), |
| interactive=True, |
| ) |
| with gr.Group(): |
| gr.Markdown(value=i18n("")) |
| with gr.Row(): |
| trainset_dir4 = gr.Textbox( |
| label=i18n("Path to Dataset"), value="dataset" |
| ) |
| with gr.Row(): |
| with gr.Accordion('Upload Dataset (alternative)', open=False, visible=True): |
| file_thin = gr.Files(label='Dataset') |
| show = gr.Textbox(label='Status') |
| transfer_button = gr.Button('Upload Dataset to the folder', variant="primary") |
| transfer_button.click( |
| fn=transfer_files, |
| inputs=[file_thin], |
| outputs=show, |
| ) |
|
|
| with gr.Group(): |
| gr.Markdown(value=i18n("")) |
| with gr.Row(): |
| save_epoch10 = gr.Slider( |
| minimum=1, |
| maximum=250, |
| step=1, |
| label=i18n("Save frequency"), |
| value=50, |
| interactive=True, |
| ) |
| total_epoch11 = gr.Slider( |
| minimum=2, |
| maximum=10000, |
| step=1, |
| label=i18n("Total Epochs"), |
| value=300, |
| interactive=True, |
| ) |
| batch_size12 = gr.Slider( |
| minimum=1, |
| maximum=16, |
| step=1, |
| label=i18n("Batch Size"), |
| value=default_batch_size, |
| interactive=True, |
| ) |
| if_save_every_weights18 = gr.Radio( |
| label=i18n("Create model with save frequency"), |
| choices=[i18n("是"), i18n("否")], |
| value=i18n("是"), |
| interactive=True, |
| ) |
|
|
| with gr.Accordion('Advanced Settings', open=False, visible=True): |
| with gr.Row(): |
| with gr.Group(): |
| spk_id5 = gr.Slider( |
| minimum=0, |
| maximum=4, |
| step=1, |
| label=i18n("Speaker ID"), |
| value=0, |
| interactive=True, |
| ) |
| if_f0_3 = gr.Radio( |
| label=i18n("Pitch Guidance"), |
| choices=[True, False], |
| value=True, |
| interactive=True, |
| ) |
| gpus6 = gr.Textbox( |
| label=i18n("GPU ID (Leave 0 if you have only one GPU, use 0-1 for multiple GPus)"), |
| value=gpus, |
| interactive=True, |
| visible=F0GPUVisible, |
| ) |
| gpu_info9 = gr.Textbox( |
| label=i18n("GPU Model"), |
| value=gpu_info, |
| visible=F0GPUVisible, |
| ) |
| gpus16 = gr.Textbox( |
| label=i18n("Enter cards to be used (Leave 0 if you have only one GPU, use 0-1 for multiple GPus)"), |
| value=gpus if gpus != "" else "0", |
| interactive=True, |
| ) |
| with gr.Group(): |
| if_save_latest13 = gr.Radio( |
| label=i18n("Save last ckpt as final Model"), |
| choices=[i18n("是"), i18n("否")], |
| value=i18n("是"), |
| interactive=True, |
| ) |
| if_cache_gpu17 = gr.Radio( |
| label=i18n("Cache data to GPU (Only for datasets under 8 minutes)"), |
| choices=[i18n("是"), i18n("否")], |
| value=i18n("否"), |
| interactive=True, |
| ) |
| f0method8 = gr.Radio( |
| label=i18n("Feature Extraction Method"), |
| choices=["rmvpe", "rmvpe_gpu"], |
| value="rmvpe_gpu", |
| interactive=True, |
| ) |
| gpus_rmvpe = gr.Textbox( |
| label=i18n( |
| "rmvpe_gpu will use your GPU instead of the CPU for the feature extraction" |
| ), |
| value="%s-%s" % (gpus, gpus), |
| interactive=True, |
| visible=F0GPUVisible, |
| ) |
| f0method8.change( |
| fn=change_f0_method, |
| inputs=[f0method8], |
| outputs=[gpus_rmvpe], |
| ) |
|
|
| with gr.Row(): |
| pretrained_G14 = gr.Textbox( |
| label="Pretrained G", |
| choices=list(pretrained_G_files.values()), |
| value=pretrained_G_files.get('f0G32.pth', ''), |
| visible=False, |
| interactive=True, |
| ) |
| pretrained_D15 = gr.Textbox( |
| label="Pretrained D", |
| choices=list(pretrained_D_files.values()), |
| value=pretrained_D_files.get('f0D32.pth', ''), |
| visible=False, |
| interactive=True, |
| ) |
| sr2.change( |
| change_sr2, |
| [sr2, if_f0_3, version19], |
| [pretrained_G14, pretrained_D15], |
| ) |
| version19.change( |
| change_version19, |
| [sr2, if_f0_3, version19], |
| [pretrained_G14, pretrained_D15, sr2], |
| ) |
| if_f0_3.change( |
| change_f0, |
| [if_f0_3, sr2, version19], |
| [f0method8, gpus_rmvpe, pretrained_G14, pretrained_D15], |
| ) |
| |
| with gr.Group(): |
| with gr.Row(): |
| but1 = gr.Button(i18n("1. Process Data"), variant="primary") |
| but2 = gr.Button(i18n("2. Feature Extraction"), variant="primary") |
| but4 = gr.Button(i18n("3. Train Index"), variant="primary") |
| but3 = gr.Button(i18n("4. Train Model"), variant="primary") |
| with gr.Row(): |
| info = gr.Textbox(label=i18n("Output"), value="", max_lines=5, lines=5) |
| but1.click( |
| preprocess_dataset, |
| [trainset_dir4, exp_dir1, sr2, np7], |
| [info], |
| api_name="train_preprocess", |
| ) |
| but2.click( |
| extract_f0_feature, |
| [ |
| gpus6, |
| np7, |
| f0method8, |
| if_f0_3, |
| exp_dir1, |
| version19, |
| gpus_rmvpe, |
| ], |
| [info], |
| api_name="train_extract_f0_feature", |
| ) |
| but4.click(train_index, [exp_dir1, version19], info) |
| but3.click( |
| click_train, |
| [ |
| exp_dir1, |
| sr2, |
| if_f0_3, |
| spk_id5, |
| save_epoch10, |
| total_epoch11, |
| batch_size12, |
| if_save_latest13, |
| pretrained_G14, |
| pretrained_D15, |
| gpus16, |
| if_cache_gpu17, |
| if_save_every_weights18, |
| version19, |
| ], |
| info, |
| api_name="train_start", |
| ) |
| but4.click(train_index, [exp_dir1, version19], info) |
| |
| with gr.TabItem(i18n("UVR5")): |
| with gr.Group(): |
| gr.Markdown( |
| value=i18n( |
| """ |
| - **Kim Vocal 2**: Effortlessly separates vocals and instrumentals, a perfect tool for music enthusiasts. |
| - **Karaoke 5 HP**: Expertly isolates two overlapping voices, making it a valuable asset for duet performances. |
| - **DeEcho DeReverb**: Skillfully eliminates reverb from vocal tracks, enhancing the clarity of your sound. |
| - **MDX23C InstVoc**: Excellent at removing sound effects or other annoying noises, ensuring a smooth listening experience. |
| - **DeNoise**: Exceptional at detecting and removing nearly imperceptible noises that can compromise the quality of a cover or a model. |
| """ |
| ) |
| ) |
|
|
| with gr.Group(): |
| uvr_handler = UVRHANDLER() |
| audios = gr.File() |
| with gr.Row(): |
| |
| output_dir = gr.Textbox('opt/', label='Output Directory') |
| with gr.Row(): |
| model_name = gr.Dropdown(choices=uvr5_names, label='Models') |
| model_status = gr.Textbox(placeholder='Waiting...', interactive=False, label='Model Information') |
| |
| with gr.Row(): |
| LOADMODELBUTTON = gr.Button('Load Model', variant="primary") |
| LOADMODELBUTTON.click( |
| fn=uvr_handler.loadmodel, |
| inputs=[model_name, output_dir], |
| outputs=[model_status] |
| ) |
| CLEARMODELBUTTON = gr.Button('Unload Model', variant="primary") |
| CLEARMODELBUTTON.click( |
| fn=uvr_handler.deloadmodel, |
| outputs=[model_status] |
| ) |
|
|
| with gr.Group(): |
| with gr.Column(): |
| with gr.Row(): |
| inst = gr.Audio(show_download_button=True, interactive=False, label='Instrumental') |
| vocal = gr.Audio(show_download_button=True, interactive=False, label='Vocals') |
| UVRBUTTON = gr.Button('Extract', variant="primary") |
| UVRBUTTON.click( |
| fn=uvr_handler.uvr, |
| inputs=[audios], |
| outputs=[inst, vocal] |
| ) |
|
|
| with gr.TabItem(i18n("Extra")): |
| with gr.Accordion('Model Info', open=False): |
| with gr.Column(): |
| sid1 = gr.Dropdown(label=i18n("Voice Model"), choices=sorted(names)) |
| refresh_button = gr.Button(i18n("Refresh"), variant="primary") |
| refresh_button.click( |
| fn=change_choices, |
| inputs=[], |
| outputs=[sid1, file_index2], |
| api_name="infer_refresh", |
| ) |
| modelload_out = gr.Textbox(label="Model Metadata", interactive=False, lines=4) |
| get_model_info_button = gr.Button(i18n("Get Model Info")) |
| get_model_info_button.click( |
| fn=vc.get_vc, |
| inputs=[sid1, protect0, protect1], |
| outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelload_out] |
| ) |
| |
| with gr.Accordion('Audio Analyser', open=False): |
| with gr.Column(): |
| audio_input = gr.Audio(type="filepath") |
| get_info_button = gr.Button( |
| value=i18n("Get information about the audio"), variant="primary" |
| ) |
| |
| with gr.Column(): |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown( |
| value=i18n("Information about the audio file"), |
| visible=True, |
| ) |
| output_markdown = gr.Markdown( |
| value=i18n("Waiting for information..."), visible=True |
| ) |
| image_output = gr.Image(type="filepath", interactive=False) |
|
|
| get_info_button.click( |
| fn=generate_spectrogram_and_get_info, |
| inputs=[audio_input], |
| outputs=[output_markdown, image_output], |
| ) |
|
|
| with gr.Accordion('Training Helper', open=False): |
| with gr.Column(): |
| audio_input = gr.Audio(type="filepath", label="Upload your audio file") |
| gr.Text("Please note that these results are approximate and intended to provide a general idea for beginners.", label='Notice:') |
| training_info_output = gr.Markdown(label="Training Information:") |
| get_info_button = gr.Button("Get Training Info") |
| get_info_button.click( |
| fn=on_button_click, |
| inputs=[audio_input], |
| outputs=[training_info_output] |
| ) |
| |
| with gr.Accordion('Training Time Calculator', open=False): |
| with gr.Column(): |
| epochs_input = gr.Number(label="Number of Epochs") |
| seconds_input = gr.Number(label="Seconds per Epoch") |
| calculate_button = gr.Button("Calculate Time Remaining") |
| remaining_time_output = gr.Textbox(label="Remaining Time", interactive=False) |
| |
| calculate_button.click( |
| fn=calculate_remaining_time, |
| inputs=[epochs_input, seconds_input], |
| outputs=[remaining_time_output] |
| ) |
| |
| with gr.Accordion(i18n("Model Fusion"), open=False): |
| with gr.Group(): |
| gr.Markdown(value=i18n("Strongly suggested to use only very clean models.")) |
| with gr.Row(): |
| ckpt_a = gr.Textbox( |
| label=i18n("Path of the first .pth"), value="", interactive=True |
| ) |
| ckpt_b = gr.Textbox( |
| label=i18n("Path of the second .pth"), value="", interactive=True |
| ) |
| alpha_a = gr.Slider( |
| minimum=0, |
| maximum=1, |
| label=i18n("Weight of the first model over the second"), |
| value=0.5, |
| interactive=True, |
| ) |
| with gr.Group(): |
| with gr.Row(): |
| sr_ = gr.Radio( |
| label=i18n("Sample rate of both models"), |
| choices=["32k","40k", "48k"], |
| value="32k", |
| interactive=True, |
| ) |
| if_f0_ = gr.Radio( |
| label=i18n("Pitch Guidance"), |
| choices=[i18n("是"), i18n("否")], |
| value=i18n("是"), |
| interactive=True, |
| ) |
| info__ = gr.Textbox( |
| label=i18n("Add informations to the model"), |
| value="", |
| max_lines=8, |
| interactive=True, |
| visible=False |
| ) |
| name_to_save0 = gr.Textbox( |
| label=i18n("Final Model name"), |
| value="", |
| max_lines=1, |
| interactive=True, |
| ) |
| version_2 = gr.Radio( |
| label=i18n("Versions of the models"), |
| choices=["v1", "v2"], |
| value="v2", |
| interactive=True, |
| ) |
| with gr.Group(): |
| with gr.Row(): |
| but6 = gr.Button(i18n("Fuse the two models"), variant="primary") |
| info4 = gr.Textbox(label=i18n("Output"), value="", max_lines=8) |
| but6.click( |
| merge, |
| [ |
| ckpt_a, |
| ckpt_b, |
| alpha_a, |
| sr_, |
| if_f0_, |
| info__, |
| name_to_save0, |
| version_2, |
| ], |
| info4, |
| api_name="ckpt_merge", |
| ) |
|
|
| with gr.Accordion('Credits', open=False): |
| gr.Markdown(''' |
| ## All the amazing people who worked on this! |
| |
| ### Developers |
| |
| - **Ilaria**: Founder, Lead Developer |
| - **Yui**: Training feature |
| - **GDR-**: Inference feature |
| - **Poopmaster**: Model downloader, Model importer |
| - **kitlemonfoot**: Ilaria TTS implementation |
| - **eddycrack864**: UVR5 implementation |
| - **Mikus**: Ilaria Updater & Downloader |
| - **Diablo**: Pretrain Automation, UI features, Various fixes |
| |
| ### Beta Tester |
| |
| - **Charlotte**: Beta Tester, Advisor |
| - **mrm0dz**: Beta Tester, Advisor |
| - **RME**: Beta Tester |
| - **Delik**: Beta Tester |
| - **inductivegrub**: Beta Tester |
| - **l3af**: Beta Tester, Helper |
| |
| ### Pretrains Makers |
| |
| - **simplcup**: Ov2Super |
| - **mustar22**: RIN_E3 & Snowie |
| |
| ### Colab Port |
| |
| - **Angetyde** |
| - **l3af** |
| - **Poopmaster** |
| - **Hina** |
| |
| ### HuggingFace Port |
| |
| - **Nick088** |
| |
| ### Other |
| |
| - **RVC Project**: Original Developers |
| - **yumereborn**: Ilaria RVC image |
| |
| ### **In loving memory of JLabDX** 🕊️ |
| ''') |
| |
| sid0.change( |
| fn=vc.get_vc, |
| inputs=[sid0, protect0, protect1], |
| outputs=[spk_item, protect0, protect1, file_index2, file_index4, modelload_out], |
| api_name="infer_change_voice", |
| ) |
| with gr.TabItem(i18n("")): |
| gr.Markdown(''' |
|  |
| ''') |
| if config.iscolab: |
| app.queue(concurrency_count=511, max_size=1022).launch(share=True) |
| else: |
| app.queue(concurrency_count=511, max_size=1022).launch( |
| server_name="0.0.0.0", |
| inbrowser=not config.noautoopen, |
| server_port=config.listen_port, |
| quiet=True, |
| ) |
|
|