| |
|
|
| |
| |
| |
| |
| defaults: |
| - audiogen/default |
| - /model: lm/audiogen_lm |
| - override /dset: audio/default |
| - _self_ |
|
|
| lm_model: transformer_lm_magnet |
| solver: audio_magnet |
|
|
| autocast: true |
| autocast_dtype: float16 |
|
|
| |
| |
| |
| |
| compression_model_checkpoint: //reference/bd44a852/checkpoint.th |
|
|
| channels: 1 |
| sample_rate: 16000 |
|
|
| deadlock: |
| use: true |
|
|
| dataset: |
| batch_size: 128 |
| num_workers: 10 |
| segment_duration: 10 |
| min_segment_ratio: 1.0 |
| sample_on_weight: false |
| sample_on_duration: false |
| external_metadata_source: null |
| |
| train: |
| batch_size: 256 |
| aug_p: 0.5 |
| mix_p: 0.5 |
| |
| |
| |
| mix_snr_low: -5 |
| mix_snr_high: 5 |
| mix_min_overlap: 0.5 |
|
|
| optim: |
| epochs: 100 |
| optimizer: adamw |
| lr: 5e-4 |
| ema: |
| use: true |
| updates: 10 |
| device: cuda |
|
|
| logging: |
| log_tensorboard: true |
|
|
| schedule: |
| lr_scheduler: inverse_sqrt |
| inverse_sqrt: |
| warmup: 3000 |
| warmup_init_lr: 0.0 |
|
|
| codebooks_pattern: |
| modeling: parallel |
| parallel: |
| empty_initial: -1 |
| |
| transformer_lm: |
| card: 2048 |
| causal: false |
| subcodes_context: 5 |
| compression_model_framerate: 50 |
| segment_duration: 0 |
| span_len: -1 |
|
|
| masking: |
| span_len: 3 |
|
|
| generate: |
| lm: |
| max_prompt_len: null |
| max_gen_len: null |
| remove_prompts: false |
| use_sampling: true |
| temp: 3.5 |
| top_k: 0 |
| top_p: 0.8 |
| max_cfg_coef: 20.0 |
| min_cfg_coef: 1.0 |
| decoding_steps: [20, 10, 10, 10] |
| anneal_temp: true |
| span_scoring: 'max' |
| span_arrangement: 'nonoverlap' |
| prompted_samples: false |
| samples: |
| prompted: false |
| unprompted: true |
|
|
|
|