| import argparse |
| import os |
| import warnings |
|
|
| import torchcrepe |
|
|
|
|
| |
| |
| |
|
|
|
|
| def parse_args(): |
| """Parse command-line arguments""" |
| parser = argparse.ArgumentParser() |
|
|
| |
| parser.add_argument( |
| '--audio_files', |
| nargs='+', |
| required=True, |
| help='The audio file to process') |
| parser.add_argument( |
| '--output_files', |
| nargs='+', |
| required=True, |
| help='The file to save pitch or embedding') |
| parser.add_argument( |
| '--hop_length', |
| type=int, |
| help='The hop length of the analysis window') |
|
|
| |
| parser.add_argument( |
| '--output_harmonicity_files', |
| nargs='+', |
| help='The file to save harmonicity') |
| |
| parser.add_argument( |
| '--output_periodicity_files', |
| nargs='+', |
| help='The files to save periodicity') |
|
|
| |
| parser.add_argument( |
| '--embed', |
| action='store_true', |
| help='Performs embedding instead of pitch prediction') |
|
|
| |
| parser.add_argument( |
| '--fmin', |
| default=50., |
| type=float, |
| help='The minimum frequency allowed') |
| parser.add_argument( |
| '--fmax', |
| default=torchcrepe.MAX_FMAX, |
| type=float, |
| help='The maximum frequency allowed') |
| parser.add_argument( |
| '--model', |
| default='full', |
| help='The model capacity. One of "tiny" or "full"') |
| parser.add_argument( |
| '--decoder', |
| default='viterbi', |
| help='The decoder to use. One of "argmax", "viterbi", or ' + |
| '"weighted_argmax"') |
| parser.add_argument( |
| '--batch_size', |
| type=int, |
| help='The number of frames per batch') |
| parser.add_argument( |
| '--gpu', |
| type=int, |
| help='The gpu to perform inference on') |
| parser.add_argument( |
| '--no_pad', |
| action='store_true', |
| help='Whether to pad the audio') |
|
|
| return parser.parse_args() |
|
|
|
|
| def make_parent_directory(file): |
| """Create parent directory for file if it does not already exist""" |
| parent = os.path.dirname(os.path.abspath(file)) |
| os.makedirs(parent, exist_ok=True) |
|
|
|
|
| def main(): |
| |
| args = parse_args() |
|
|
| |
| if args.output_harmonicity_files is not None: |
| message = ( |
| 'The torchcrepe output_harmonicity_files argument is deprecated and ' |
| 'will be removed in a future release. Please use ' |
| 'output_periodicity_files. Rationale: if network confidence measured ' |
| 'harmonic content, the value would be low for non-harmonic, periodic ' |
| 'sounds (e.g., sine waves). But this is not observed.') |
| warnings.warn(message, DeprecationWarning) |
| args.output_periodicity_files = args.output_harmonicity_files |
|
|
| |
| [make_parent_directory(file) for file in args.output_files] |
| if args.output_periodicity_files is not None: |
| [make_parent_directory(file) for file in args.output_periodicity_files] |
|
|
| |
| device = 'cpu' if args.gpu is None else f'cuda:{args.gpu}' |
|
|
| |
| if args.decoder == 'argmax': |
| decoder = torchcrepe.decode.argmax |
| elif args.decoder == 'weighted_argmax': |
| decoder = torchcrepe.decode.weighted_argmax |
| elif args.decoder == 'viterbi': |
| decoder = torchcrepe.decode.viterbi |
|
|
| |
| if args.embed: |
| torchcrepe.embed_from_files_to_files(args.audio_files, |
| args.output_files, |
| args.hop_length, |
| args.model, |
| args.batch_size, |
| device, |
| not args.no_pad) |
| else: |
| torchcrepe.predict_from_files_to_files(args.audio_files, |
| args.output_files, |
| None, |
| args.output_periodicity_files, |
| args.hop_length, |
| args.fmin, |
| args.fmax, |
| args.model, |
| decoder, |
| args.batch_size, |
| device, |
| not args.no_pad) |
|
|
|
|
| |
| main() |
|
|