| model: |
| _target_: src.model.ConformerModel |
| input_dim: 128 |
| writer: |
| _target_: src.logger.CometMLWriter |
| project_name: pytorch_template_asr_example |
| workspace: null |
| run_name: conformer_30m |
| mode: online |
| loss_names: |
| - loss |
| log_checkpoints: false |
| id_length: 32 |
| run_id: m2guzao93o9ytjxogwt78mftkyiqalsf |
| metrics: |
| train: [] |
| inference: |
| - _target_: src.metrics.ArgmaxCERMetric |
| name: CER_(Argmax) |
| - _target_: src.metrics.ArgmaxWERMetric |
| name: WER_(Argmax) |
| - _target_: src.metrics.WER |
| name: WER |
| - _target_: src.metrics.CER |
| name: CER |
| datasets: |
| train: |
| _target_: src.datasets.LibrispeechDataset |
| part: train-other-500 |
| instance_transforms: ${transforms.instance_transforms.train} |
| val: |
| _target_: src.datasets.LibrispeechDataset |
| part: test-clean |
| instance_transforms: ${transforms.instance_transforms.inference} |
| test: |
| _target_: src.datasets.LibrispeechDataset |
| part: test-other |
| instance_transforms: ${transforms.instance_transforms.inference} |
| dataloader: |
| _target_: torch.utils.data.DataLoader |
| batch_size: 30 |
| num_workers: 2 |
| pin_memory: true |
| transforms: |
| instance_transforms: |
| train: |
| get_spectrogram: |
| _target_: torchaudio.transforms.MelSpectrogram |
| sample_rate: 16000 |
| audio: |
| _target_: torchvision.transforms.v2.Compose |
| transforms: |
| - _target_: src.transforms.wav_augs.Gain |
| sample_rate: 16000 |
| min_gain_in_db: -6 |
| max_gain_in_db: 6 |
| p: 0.2 |
| - _target_: src.transforms.wav_augs.Shift |
| p: 0.2 |
| - _target_: src.transforms.wav_augs.PitchShift |
| min_semitones: -2 |
| max_semitones: 2 |
| p: 0.2 |
| - _target_: src.transforms.wav_augs.Noise |
| p: 0.3 |
| inference: |
| get_spectrogram: |
| _target_: torchaudio.transforms.MelSpectrogram |
| sample_rate: 16000 |
| batch_transforms: |
| train: null |
| inference: null |
| optimizer: |
| _target_: torch.optim.AdamW |
| lr: 5.0e-05 |
| lr_scheduler: |
| _target_: torch.optim.lr_scheduler.OneCycleLR |
| max_lr: 0.0001 |
| pct_start: 0.1 |
| steps_per_epoch: ${trainer.epoch_len} |
| epochs: ${trainer.n_epochs} |
| anneal_strategy: cos |
| loss_function: |
| _target_: src.loss.CTCLossWrapper |
| text_encoder: |
| _target_: src.text_encoder.CTCTextEncoder |
| trainer: |
| log_step: 200 |
| n_epochs: 150 |
| epoch_len: 1300 |
| device_tensors: |
| - spectrogram |
| - text_encoded |
| resume_from: checkpoint-epoch62.pth |
| device: auto |
| override: false |
| monitor: min val_WER_(Argmax) |
| save_period: 5 |
| early_stop: ${trainer.n_epochs} |
| save_dir: saved |
| seed: 1 |
|
|