| import io |
| import logging |
| import time |
| from pathlib import Path |
|
|
| import librosa |
| import numpy as np |
| import soundfile |
|
|
| from inference import infer_tool |
| from inference import slicer |
| from inference.infer_tool import Svc |
|
|
| logging.getLogger('numba').setLevel(logging.WARNING) |
| chunks_dict = infer_tool.read_temp("inference/chunks_temp.json") |
|
|
| model_path = "logs/32k/sing1.pth" |
| config_path = "configs/config.json" |
| svc_model = Svc(model_path, config_path, dev="cuda") |
| infer_tool.mkdir(["raw", "results"]) |
|
|
| |
| clean_names = ["xzh3"] |
| trans = [2] |
| spk_list = ['yukie'] |
| slice_db = -40 |
| wav_format = 'flac' |
|
|
| infer_tool.fill_a_to_b(trans, clean_names) |
| for clean_name, tran in zip(clean_names, trans): |
| raw_audio_path = f"raw/{clean_name}" |
| if "." not in raw_audio_path: |
| raw_audio_path += ".wav" |
| infer_tool.format_wav(raw_audio_path) |
| wav_path = Path(raw_audio_path).with_suffix('.wav') |
| chunks = slicer.cut(wav_path, db_thresh=slice_db) |
| audio_data, audio_sr = slicer.chunks2audio(wav_path, chunks) |
|
|
| for spk in spk_list: |
| audio = [] |
| for (slice_tag, data) in audio_data: |
| print( |
| f'#=====segment start, {round(len(data) / audio_sr, 3)}s======') |
| length = int( |
| np.ceil(len(data) / audio_sr * svc_model.target_sample)) |
| raw_path = io.BytesIO() |
| soundfile.write(raw_path, data, audio_sr, format="wav") |
| raw_path.seek(0) |
| if slice_tag: |
| print('jump empty segment') |
| _audio = np.zeros(length) |
| else: |
| out_audio, out_sr = svc_model.infer(spk, tran, raw_path) |
| _audio = out_audio.cpu().numpy() |
| audio.extend(list(_audio)) |
|
|
| res_path = f'./results/{clean_name}_{tran}key_{spk}-6-1.{wav_format}' |
| soundfile.write(res_path, audio, |
| svc_model.target_sample, format=wav_format) |
|
|