| # Train Canary ASR model | |
| HYDRA_FULL_ERROR=1 python scripts/speech_to_text_aed.py \ | |
| --config-path="/home/ubuntu/NeMo/config" \ | |
| --config-name="fast-conformer_aed.yaml" \ | |
| name="canary-small" \ | |
| model.prompt_format="canary2" \ | |
| model.train_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_train.json" \ | |
| model.validation_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_val.json" \ | |
| model.test_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_test.json" \ | |
| model.tokenizer.langs.ja.dir="/home/ubuntu/NeMo/tokenizers/jp_TSUKA_1024/tokenizer_spe_bpe_v1024" \ | |
| model.tokenizer.langs.spl_tokens.dir="/home/ubuntu/NeMo/tokenizers/spl_tokens" \ | |
| spl_tokens.model_dir="/home/ubuntu/NeMo/tokenizers/spl_tokens" \ | |
| model.encoder.n_layers=17 \ | |
| model.transf_decoder.config_dict.num_layers=4 \ | |
| model.transf_decoder.config_dict.max_sequence_length=512 \ | |
| model.model_defaults.asr_enc_hidden=512 \ | |
| model.model_defaults.lm_dec_hidden=1024 \ | |
| exp_manager.exp_dir="canary_results" \ | |
| exp_manager.resume_ignore_no_checkpoint=true \ | |
| trainer.max_steps=200_000 \ | |
| trainer.log_every_n_steps=50 |