| command: |
| - python3 |
| - ${program} |
| - --do_train |
| - --do_eval |
| - --use_scan |
| - --gradient_checkpointing |
| - --overwrite_output_dir |
| - --predict_with_generate |
| - --freeze_encoder |
| - --streaming |
| - --use_auth_token |
| - --compilation_cache |
| - --load_with_scan_weights |
| - ${args} |
| method: grid |
| metric: |
| goal: minimize |
| name: eval/wer |
| parameters: |
| model_name_or_path: |
| value: distil-whisper/large-32-2-ts-freeze-librispeech |
| teacher_model_name_or_path: |
| value: openai/whisper-large-v2 |
| train_dataset_name: |
| value: librispeech_asr+librispeech_asr+librispeech_asr+common_voice_13_0+voxpopuli+ami-ihm+ami-sdm+peoples_speech-clean+tedlium+switchboard-data+gigaspeech-l+spgispeech |
| train_dataset_config_name: |
| value: all+all+all+en+en+ihm+sdm+clean+release3+all+l+L |
| train_split_name: |
| value: train.clean.100+train.clean.360+train.other.500+train+train+train+train+train+train+train+train+train |
| train_dataset_samples: |
| value: 100+360+500+2300+450+90+90+12000+450+3600+2500+5000 |
| eval_dataset_name: |
| value: "distil-whisper/gigaspeech-l" |
| eval_dataset_config_name: |
| value: "l" |
| cache_dir: |
| value: /fsx/sanchit/cache |
| dataset_cache_dir: |
| value: /fsx/sanchit/cache |
| output_dir: |
| value: ./ |
| per_device_train_batch_size: |
| value: 128 |
| per_device_eval_batch_size: |
| value: 128 |
| dtype: |
| value: bfloat16 |
| learning_rate: |
| values: |
| - 1e-3 |
| - 3e-4 |
| - 1e-4 |
| - 3e-5 |
| - 1e-5 |
| lr_scheduler_type: |
| value: constant_with_warmup |
| warmup_steps: |
| value: 50 |
| max_steps: |
| value: 500 |
| eval_steps: |
| value: 500 |
| save_steps: |
| value: 501 |
| dataloader_num_workers: |
| value: 16 |
| logging_steps: |
| value: 5 |
| wer_threshold: |
| value: 10 |
| program: run_distillation.py |
| project: distil-whisper-sweeps |
|
|