| |
| |
| |
| |
|
|
| import os |
| import numpy as np |
| import torch |
| import torchaudio |
|
|
|
|
| def save_feature(process_dir, feature_dir, item, feature, overrides=True): |
| """Save features to path |
| |
| Args: |
| process_dir (str): directory to store features |
| feature_dir (_type_): directory to store one type of features (mel, energy, ...) |
| item (str): uid |
| feature (tensor): feature tensor |
| overrides (bool, optional): whether to override existing files. Defaults to True. |
| """ |
| process_dir = os.path.join(process_dir, feature_dir) |
| os.makedirs(process_dir, exist_ok=True) |
| out_path = os.path.join(process_dir, item + ".npy") |
|
|
| if os.path.exists(out_path): |
| if overrides: |
| np.save(out_path, feature) |
| else: |
| np.save(out_path, feature) |
|
|
|
|
| def save_txt(process_dir, feature_dir, item, feature, overrides=True): |
| process_dir = os.path.join(process_dir, feature_dir) |
| os.makedirs(process_dir, exist_ok=True) |
| out_path = os.path.join(process_dir, item + ".txt") |
|
|
| if os.path.exists(out_path): |
| if overrides: |
| f = open(out_path, "w") |
| f.writelines(feature) |
| f.close() |
| else: |
| f = open(out_path, "w") |
| f.writelines(feature) |
| f.close() |
|
|
|
|
| def save_audio(path, waveform, fs, add_silence=False, turn_up=False, volume_peak=0.9): |
| if turn_up: |
| |
| ratio = volume_peak / max(waveform.max(), abs(waveform.min())) |
| waveform = waveform * ratio |
|
|
| if add_silence: |
| silence_len = fs // 20 |
| silence = np.zeros((silence_len,), dtype=waveform.dtype) |
| result = np.concatenate([silence, waveform, silence]) |
| waveform = result |
|
|
| waveform = torch.as_tensor(waveform, dtype=torch.float32, device="cpu") |
| if len(waveform.size()) == 1: |
| waveform = waveform[None, :] |
| elif waveform.size(0) != 1: |
| |
| waveform = torch.mean(waveform, dim=0, keepdim=True) |
| torchaudio.save(path, waveform, fs, encoding="PCM_S", bits_per_sample=16) |
|
|
|
|
| async def async_load_audio(path, sample_rate: int = 24000): |
| r""" |
| Args: |
| path: The source loading path. |
| sample_rate: The target sample rate, will automatically resample if necessary. |
| |
| Returns: |
| waveform: The waveform object. Should be [1 x sequence_len]. |
| """ |
|
|
| async def use_torchaudio_load(path): |
| return torchaudio.load(path) |
|
|
| waveform, sr = await use_torchaudio_load(path) |
| waveform = torch.mean(waveform, dim=0, keepdim=True) |
|
|
| if sr != sample_rate: |
| waveform = torchaudio.functional.resample(waveform, sr, sample_rate) |
|
|
| if torch.any(torch.isnan(waveform) or torch.isinf(waveform)): |
| raise ValueError("NaN or Inf found in waveform.") |
| return waveform |
|
|
|
|
| async def async_save_audio( |
| path, |
| waveform, |
| sample_rate: int = 24000, |
| add_silence: bool = False, |
| volume_peak: float = 0.9, |
| ): |
| r""" |
| Args: |
| path: The target saving path. |
| waveform: The waveform object. Should be [n_channel x sequence_len]. |
| sample_rate: Sample rate. |
| add_silence: If ``true``, concat 0.05s silence to beginning and end. |
| volume_peak: Turn up volume for larger number, vice versa. |
| """ |
|
|
| async def use_torchaudio_save(path, waveform, sample_rate): |
| torchaudio.save( |
| path, waveform, sample_rate, encoding="PCM_S", bits_per_sample=16 |
| ) |
|
|
| waveform = torch.as_tensor(waveform, device="cpu", dtype=torch.float32) |
| shape = waveform.size()[:-1] |
|
|
| ratio = abs(volume_peak) / max(waveform.max(), abs(waveform.min())) |
| waveform = waveform * ratio |
|
|
| if add_silence: |
| silence_len = sample_rate // 20 |
| silence = torch.zeros((*shape, silence_len), dtype=waveform.type()) |
| waveform = torch.concatenate((silence, waveform, silence), dim=-1) |
|
|
| if waveform.dim() == 1: |
| waveform = waveform[None] |
|
|
| await use_torchaudio_save(path, waveform, sample_rate) |
|
|
|
|
| def load_mel_extrema(cfg, dataset_name, split): |
| dataset_dir = os.path.join( |
| cfg.OUTPUT_PATH, |
| "preprocess/{}_version".format(cfg.data.process_version), |
| dataset_name, |
| ) |
|
|
| min_file = os.path.join( |
| dataset_dir, |
| "mel_min_max", |
| split.split("_")[-1], |
| "mel_min.npy", |
| ) |
| max_file = os.path.join( |
| dataset_dir, |
| "mel_min_max", |
| split.split("_")[-1], |
| "mel_max.npy", |
| ) |
| mel_min = np.load(min_file) |
| mel_max = np.load(max_file) |
| return mel_min, mel_max |
|
|