Training

  • System: ASRSystem
  • Recipe: mini_an4/asr
  • Created: 2026-04-03T04:24:18.630202
  • Git: 9f2acd4d196a14cd7daec5b929e1420414d753fa (dirty)

Pack

  • Archive: model_pack
  • Strategy: espnet2
  • Exp dir: exp/train_asr_rnn_data_aug

Train config

expand
num_device: 1
num_nodes: 1
task: espnet3.systems.asr.task.ASRTask
recipe_dir: .
data_dir: ./data
exp_tag: train_asr_rnn_data_aug
exp_dir: ./exp/train_asr_rnn_data_aug
stats_dir: ./exp/stats
inference_dir: ./exp/train_asr_rnn_data_aug/inference
dataset_dir: /path/to/your/dataset
create_dataset:
  func: src.creating_dataset.create_dataset
  dataset_dir: /path/to/your/dataset
  recipe_dir: .
dataset:
  _target_: espnet3.components.data.data_organizer.DataOrganizer
  train:
  - ref: mini_an4/asr
    kwargs:
      split: train
  valid:
  - ref: mini_an4/asr
    kwargs:
      split: valid
  test: null
  preprocessor:
    _target_: espnet2.train.preprocessor.CommonPreprocessor
    fs: 16000
    train: true
    data_aug_effects:
    - - 0.1
      - contrast
      - enhancement_amount: 75.0
    - - 0.1
      - highpass
      - cutoff_freq: 5000
        Q: 0.707
    - - 0.1
      - equalization
      - center_freq: 1000
        gain: 0
        Q: 0.707
    - - 0.1
      - - - 0.3
          - speed_perturb
          - factor: 0.9
        - - 0.3
          - speed_perturb
          - factor: 1.1
        - - 0.3
          - speed_perturb
          - factor: 1.3
    data_aug_num:
    - 1
    - 4
    data_aug_prob: 1.0
    token_type: bpe
    token_list: ./data/bpe_30/tokens.txt
    bpemodel: ./data/bpe_30/bpe.model
    _convert_: all
  _convert_: all
tokenizer:
  vocab_size: 30
  character_coverage: 1.0
  model_type: bpe
  save_path: ./data/bpe_30
  text_builder:
    func: src.tokenizer.gather_training_text
    manifest_path: ./data/manifest/train.tsv
model:
  vocab_size: 30
  token_list: ./data/bpe_30/tokens.txt
  encoder: vgg_rnn
  encoder_conf:
    num_layers: 1
    hidden_size: 2
    output_size: 2
  decoder: rnn
  decoder_conf:
    hidden_size: 2
  normalize: utterance_mvn
  normalize_conf: {}
  model_conf:
    ctc_weight: 0.3
    lsm_weight: 0.1
    length_normalized_loss: false
  frontend: default
  frontend_conf:
    n_fft: 512
    win_length: 400
    hop_length: 160
optimizer:
  _target_: torch.optim.AdamW
  lr: 0.001
  weight_decay: 0.0
  _convert_: all
scheduler:
  _target_: espnet2.schedulers.warmup_lr.WarmupLR
  warmup_steps: 15000
  _convert_: all
scheduler_interval: step
scheduler_monitor: null
best_model_criterion:
- - valid/acc
  - 1
  - max
seed: null
init: null
parallel:
  env: local
  n_workers: 1
dataloader:
  collate_fn:
    _target_: espnet2.train.collate_fn.CommonCollateFn
    int_pad_value: -1
    _convert_: all
  train:
    num_shards: 1
    iter_factory:
      _target_: espnet2.iterators.sequence_iter_factory.SequenceIterFactory
      shuffle: true
      collate_fn:
        _target_: espnet2.train.collate_fn.CommonCollateFn
        int_pad_value: -1
        _convert_: all
      batches:
        type: sorted
        shape_files:
        - ./exp/stats/train/feats_shape
        batch_size: 1
        batch_bins: 4000000
      _convert_: all
  valid:
    num_shards: 1
    iter_factory:
      _target_: espnet2.iterators.sequence_iter_factory.SequenceIterFactory
      shuffle: false
      collate_fn:
        _target_: espnet2.train.collate_fn.CommonCollateFn
        int_pad_value: -1
        _convert_: all
      batches:
        type: sorted
        shape_files:
        - ./exp/stats/valid/feats_shape
        batch_size: 1
        batch_bins: 4000000
      _convert_: all
trainer:
  accelerator: auto
  devices: 1
  num_nodes: 1
  accumulate_grad_batches: 1
  check_val_every_n_epoch: 1
  gradient_clip_val: 1.0
  log_every_n_steps: 1
  max_epochs: 1
  logger:
  - _target_: lightning.pytorch.loggers.TensorBoardLogger
    save_dir: ./exp/train_asr_rnn_data_aug/tensorboard
    name: tb_logger
    _convert_: all
  strategy: auto
  limit_train_batches: 1
  limit_val_batches: 1
fit: {}
override scheduler:
  _target_: torch.optim.lr_scheduler.ReduceLROnPlateau
  mode: min
  factor: 0.5
  patience: 1
  _convert_: all
val_scheduler_criterion: valid/loss

Citing ESPnet

@inproceedings{watanabe2018espnet,
  author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and
    Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner
    and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
  title={{ESPnet}: End-to-End Speech Processing Toolkit},
  year={2018},
  booktitle={Proceedings of Interspeech},
  pages={2207--2211},
  doi={10.21437/Interspeech.2018-1456}
}
Downloads last month
6
Inference Providers NEW
This model isn't deployed by any Inference Provider. 🙋 Ask for provider support