| training: | |
| epochs: 200 | |
| early_stopping: 20 | |
| batch_size: 48 | |
| accumulation_steps: 2 # effective batch = 48 × 2 = 96 | |
| learning_rate: 0.00005 | |
| grad_clip: 30.0 # ← réduit : 1.0 provoquait des instabilités | |
| save_every: 10 | |
| num_workers: 4 | |
| seed: 42 | |
| scheduler: | |
| name: warmup_cosine | |
| warmup_epochs: 5 # ← augmenté : plus de warmup avec peu de données | |
| min_lr: 0.0000001 | |
| optimizer: | |
| name: adam | |
| weight_decay: 0.001 | |
| model: | |
| n_filters: 256 | |
| filter_length: 16 | |
| stride: 8 | |
| n_blocks: 6 | |
| n_repeats: 3 | |
| bn_chan: 128 | |
| hid_chan: 256 | |
| skip_chan: 128 | |
| norm_type: gLN | |
| mask_act: relu | |
| gradient_checkpointing: true | |
| paths: | |
| checkpoint_dir: checkpoints_3src_2 | |
| log_dir: logs_3src_2 | |