NeMo_Canary / train_qanary.sh
Respair's picture
Upload folder using huggingface_hub
b386992 verified
#!/bin/bash
# Train Canary ASR model
HYDRA_FULL_ERROR=1 python scripts/speech_to_text_aed.py \
--config-path="/home/ubuntu/NeMo/config" \
--config-name="fast-conformer_aed.yaml" \
name="canary-small" \
model.prompt_format="canary2" \
model.train_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_train.json" \
model.validation_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_val.json" \
model.test_ds.manifest_filepath="/home/ubuntu/NeMo/data/tsukasa_test.json" \
model.tokenizer.langs.ja.dir="/home/ubuntu/NeMo/tokenizers/jp_TSUKA_1024/tokenizer_spe_bpe_v1024" \
model.tokenizer.langs.spl_tokens.dir="/home/ubuntu/NeMo/tokenizers/spl_tokens" \
spl_tokens.model_dir="/home/ubuntu/NeMo/tokenizers/spl_tokens" \
model.encoder.n_layers=17 \
model.transf_decoder.config_dict.num_layers=4 \
model.transf_decoder.config_dict.max_sequence_length=512 \
model.model_defaults.asr_enc_hidden=512 \
model.model_defaults.lm_dec_hidden=1024 \
exp_manager.exp_dir="canary_results" \
exp_manager.resume_ignore_no_checkpoint=true \
trainer.max_steps=200_000 \
trainer.log_every_n_steps=50