tsw0411 commited on
Commit
8b8191d
·
verified ·
1 Parent(s): 6fba386

Upload folder using huggingface_hub

Browse files
Files changed (22) hide show
  1. .gitattributes +1 -0
  2. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0-last.ckpt +3 -0
  3. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0.ckpt +3 -0
  4. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1348-epoch=0.ckpt +3 -0
  5. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1382-epoch=0.ckpt +3 -0
  6. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1458-epoch=0.ckpt +3 -0
  7. peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1609-epoch=0.ckpt +3 -0
  8. peav_sortformer_train/checkpoints/peav_sortformer_train.nemo +3 -0
  9. peav_sortformer_train/cmd-args.log +1 -0
  10. peav_sortformer_train/git-info.log +1 -0
  11. peav_sortformer_train/lightning_logs.txt +23 -0
  12. peav_sortformer_train/nemo_error_log.txt +8 -0
  13. peav_sortformer_train/nemo_log_globalrank-0_localrank-0.txt +254 -0
  14. peav_sortformer_train/nemo_log_globalrank-1_localrank-1.txt +248 -0
  15. peav_sortformer_train/nemo_log_globalrank-2_localrank-2.txt +248 -0
  16. peav_sortformer_train/nemo_log_globalrank-3_localrank-3.txt +248 -0
  17. peav_sortformer_train/nemo_log_globalrank-4_localrank-4.txt +248 -0
  18. peav_sortformer_train/nemo_log_globalrank-5_localrank-5.txt +248 -0
  19. peav_sortformer_train/nemo_log_globalrank-6_localrank-6.txt +248 -0
  20. peav_sortformer_train/nemo_log_globalrank-7_localrank-7.txt +248 -0
  21. peav_sortformer_train/version_0/events.out.tfevents.1776078731.260d0e88e8f5.93828.0 +3 -0
  22. peav_sortformer_train/version_0/hparams.yaml +161 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ peav_sortformer_train/checkpoints/peav_sortformer_train.nemo filter=lfs diff=lfs merge=lfs -text
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0-last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:168fcc556f339689efb16b22d8d6222a7eb76e971f0a19d81d9599f0b5c5ef96
3
+ size 1251559602
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ced2d1f4a2f261033d596a69d55b7097eb95c48ebee8517dc46d7fc89df98d3
3
+ size 1251559602
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1348-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52aecebedd98ce1bb126732390254bd12caaaed91698800be432849bcc370e0e
3
+ size 1251559602
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1382-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2345c2778a51324a64f7bafd924f804da4bf2918abb17186531f0d1f65da2333
3
+ size 1251559602
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1458-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1023cbc9cad4e83bd782fc2e6bd9429facf9a4d18b7a2a88274b0bd7039ff9
3
+ size 1251559027
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1609-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:873ebc0a3928e07d4e42efc2c42c772e9a3a76de3c12619de3c57030316c2add
3
+ size 1251559474
peav_sortformer_train/checkpoints/peav_sortformer_train.nemo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbfcc298079572797332202d997c1a9db7a969b0b71a414903cc0965e8c31fff
3
+ size 500469760
peav_sortformer_train/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ scripts/peav_sortformer_train.py --config-path=../configs --config-name=peav_sortformer_4spk.yaml exp_manager.name=peav_sortformer_train exp_manager.exp_dir=./peav_sortformer_train
peav_sortformer_train/git-info.log ADDED
@@ -0,0 +1 @@
 
 
1
+ commit hash: b1e2e995a240deef69a6b3a9e40693059b976a6c
peav_sortformer_train/lightning_logs.txt ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
2
+
3
+ | Name | Type | Params | Mode
4
+ ---------------------------------------------------------------------
5
+ 0 | dac_vae | DacEncoderVAE | 27.7 M | train
6
+ 1 | data_proj | Linear | 99.1 K | train
7
+ 2 | encoder | PEAVEncoderWrapper | 89.1 M | train
8
+ 3 | sortformer_modules | SortformerModules | 187 K | train
9
+ 4 | transformer_encoder | TransformerEncoder | 8.0 M | train
10
+ 5 | loss | BCELoss | 0 | train
11
+ 6 | _accuracy_test | MultiBinaryAccuracy | 0 | train
12
+ 7 | _accuracy_train | MultiBinaryAccuracy | 0 | train
13
+ 8 | _accuracy_valid | MultiBinaryAccuracy | 0 | train
14
+ 9 | _accuracy_test_ats | MultiBinaryAccuracy | 0 | train
15
+ 10 | _accuracy_train_ats | MultiBinaryAccuracy | 0 | train
16
+ 11 | _accuracy_valid_ats | MultiBinaryAccuracy | 0 | train
17
+ ---------------------------------------------------------------------
18
+ 93.8 M Trainable params
19
+ 31.2 M Non-trainable params
20
+ 125 M Total params
21
+ 500.237 Total estimated model params size (MB)
22
+ 545 Modules in train mode
23
+ 0 Modules in eval mode
peav_sortformer_train/nemo_error_log.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
2
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
3
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
4
+ [NeMo W 2026-04-13 11:11:25 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
5
+ [NeMo W 2026-04-13 11:12:11 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
6
+
7
+ [NeMo W 2026-04-13 11:12:54 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
8
+
peav_sortformer_train/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,254 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:25 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo I 2026-04-13 11:11:25 exp_manager:594] ExpManager schema
202
+ [NeMo I 2026-04-13 11:11:25 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
203
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
204
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
205
+ [NeMo I 2026-04-13 11:11:25 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
206
+ [NeMo I 2026-04-13 11:11:25 exp_manager:1262] TensorboardLogger has been set up
207
+ [NeMo W 2026-04-13 11:11:25 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
208
+ [NeMo I 2026-04-13 11:11:25 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
209
+ [NeMo I 2026-04-13 11:11:25 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo I 2026-04-13 11:11:25 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
211
+ [NeMo W 2026-04-13 11:11:25 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
212
+ [NeMo I 2026-04-13 11:11:25 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
213
+ [NeMo I 2026-04-13 11:11:30 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
214
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
215
+ Parameter Group 0
216
+ amsgrad: False
217
+ betas: (0.9, 0.98)
218
+ capturable: False
219
+ decoupled_weight_decay: True
220
+ differentiable: False
221
+ eps: 1e-08
222
+ foreach: None
223
+ fused: None
224
+ lr: 2e-05
225
+ maximize: False
226
+ weight_decay: 0.001
227
+ )
228
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7828e016d130>"
229
+ will be used during training (effective maximum steps = 16000) -
230
+ Parameters :
231
+ (warmup_steps: 2500
232
+ warmup_ratio: null
233
+ min_lr: 1.0e-06
234
+ max_steps: 16000
235
+ )
236
+ [NeMo W 2026-04-13 11:12:11 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
237
+
238
+ [NeMo W 2026-04-13 11:12:54 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
239
+
240
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118162.
241
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.535194.
242
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395654.
243
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733094.
244
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493985.
245
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072822.
246
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771407.
247
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.213656.
248
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.942046.
249
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.637489.
250
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543324.
251
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.729116.
252
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408545.
253
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822026.
254
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9141016.
peav_sortformer_train/nemo_log_globalrank-1_localrank-1.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:10 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x79ec3de84830>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118205.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351932.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395537.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330906.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493804.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072822.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771307.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.213653.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420345.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374886.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543212.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291036.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085367.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822011.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.914093.
peav_sortformer_train/nemo_log_globalrank-2_localrank-2.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:40 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:40 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:40 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:40 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:40 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:40 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:45 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:45 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:45 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:45 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:07 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x73feafc4b740>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118193.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351932.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395525.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330956.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493802.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.307285.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771292.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136545.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420338.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374915.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543217.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291129.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085355.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822057.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140923.
peav_sortformer_train/nemo_log_globalrank-3_localrank-3.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:38 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:38 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:38 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:38 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:38 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:38 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:38 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:43 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:43 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:43 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:43 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:11:53 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x713d95e20740>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118226.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351937.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395597.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733098.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493806.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072832.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.977131.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136538.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420352.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374905.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543234.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291148.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408538.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.182203.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140947.
peav_sortformer_train/nemo_log_globalrank-4_localrank-4.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:45 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:45 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:06 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7c7c4a659460>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118166.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351925.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395554.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733096.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493816.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072834.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771283.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136545.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420424.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374896.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543217.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291138.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085386.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822033.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140968.
peav_sortformer_train/nemo_log_globalrank-5_localrank-5.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:11 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x703b76086360>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118183.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351942.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.239556.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330983.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.449381.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072872.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771423.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136571.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420366.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.637491.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543243.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291174.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085383.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822045.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140959.
peav_sortformer_train/nemo_log_globalrank-6_localrank-6.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:10 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x72cf44392b40>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.111822.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351958.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395577.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330983.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493845.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.307287.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771316.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136576.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420383.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374958.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543286.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291193.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408541.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822076.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140973.
peav_sortformer_train/nemo_log_globalrank-7_localrank-7.txt ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ peav_checkpoint: pe-av-small-16-frame
10
+ peav_pretrained: true
11
+ peav_freeze_layers: 0
12
+ rttm_unit_10ms_frame_count: 4
13
+ der_collar: 0.25
14
+ der_ignore_overlap: true
15
+ model_defaults:
16
+ fc_d_model: 768
17
+ tf_d_model: 192
18
+ train_ds:
19
+ manifest_filepath: null
20
+ sample_rate: ${model.sample_rate}
21
+ num_spks: ${model.max_num_of_spks}
22
+ session_len_sec: 45
23
+ shift_sec: 4
24
+ soft_label_thres: 0.5
25
+ soft_targets: false
26
+ labels: null
27
+ batch_size: ${batch_size}
28
+ shuffle: true
29
+ num_workers: ${num_workers}
30
+ validation_mode: false
31
+ use_hf_streaming: true
32
+ hf_dataset_path: humanify/real_dia_dataset
33
+ hf_configs:
34
+ - CHiME6
35
+ - Dipco
36
+ - ICSI
37
+ - M3SD
38
+ - NOTSOFAR
39
+ - aishell4
40
+ - aishell5
41
+ - alimeeting
42
+ - ami_ihm
43
+ - ami_sdm
44
+ - callhome
45
+ - msdwild
46
+ - voxconverse
47
+ hf_split: train
48
+ shuffle_seed: 42
49
+ shuffle_buffer_size: 4
50
+ prefetch_factor: 4
51
+ persistent_workers: true
52
+ prefetch_rows: 8
53
+ use_lhotse: false
54
+ use_bucketing: true
55
+ num_buckets: 10
56
+ bucket_duration_bins:
57
+ - 10
58
+ - 20
59
+ - 30
60
+ - 40
61
+ - 50
62
+ - 60
63
+ - 70
64
+ - 80
65
+ - 90
66
+ pin_memory: true
67
+ min_duration: 10
68
+ max_duration: 90
69
+ batch_duration: 400
70
+ quadratic_duration: 1200
71
+ bucket_buffer_size: 20000
72
+ window_stride: 0.04
73
+ subsampling_factor: 1
74
+ validation_ds:
75
+ manifest_filepath: null
76
+ is_tarred: false
77
+ tarred_audio_filepaths: null
78
+ sample_rate: ${model.sample_rate}
79
+ num_spks: ${model.max_num_of_spks}
80
+ session_len_sec: 45
81
+ shift_sec: 4
82
+ soft_label_thres: 0.5
83
+ soft_targets: false
84
+ labels: null
85
+ batch_size: ${batch_size}
86
+ shuffle: false
87
+ num_workers: ${num_workers}
88
+ validation_mode: true
89
+ use_hf_streaming: true
90
+ hf_dataset_path: humanify/real_dia_dataset
91
+ hf_configs:
92
+ - alm_benchmark
93
+ hf_split: train
94
+ shuffle_seed: 42
95
+ shuffle_buffer_size: 100
96
+ prefetch_factor: 4
97
+ persistent_workers: true
98
+ prefetch_rows: 4
99
+ use_lhotse: false
100
+ use_bucketing: false
101
+ drop_last: false
102
+ pin_memory: true
103
+ window_stride: 0.04
104
+ subsampling_factor: 1
105
+ test_ds:
106
+ manifest_filepath: null
107
+ is_tarred: false
108
+ tarred_audio_filepaths: null
109
+ sample_rate: ${model.sample_rate}
110
+ num_spks: ${model.max_num_of_spks}
111
+ session_len_sec: 90
112
+ soft_label_thres: 0.5
113
+ soft_targets: false
114
+ labels: null
115
+ batch_size: ${batch_size}
116
+ shuffle: false
117
+ seq_eval_mode: true
118
+ num_workers: ${num_workers}
119
+ validation_mode: true
120
+ use_lhotse: false
121
+ use_bucketing: false
122
+ drop_last: false
123
+ pin_memory: true
124
+ window_stride: 0.04
125
+ subsampling_factor: 1
126
+ sortformer_modules:
127
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
128
+ num_spks: ${model.max_num_of_spks}
129
+ dropout_rate: 0.1
130
+ fc_d_model: ${model.model_defaults.fc_d_model}
131
+ tf_d_model: ${model.model_defaults.tf_d_model}
132
+ subsampling_factor: 1
133
+ encoder:
134
+ d_model: ${model.model_defaults.fc_d_model}
135
+ subsampling_factor: 1
136
+ transformer_encoder:
137
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
138
+ num_layers: 18
139
+ hidden_size: ${model.model_defaults.tf_d_model}
140
+ inner_size: 768
141
+ num_attention_heads: 8
142
+ attn_score_dropout: 0.5
143
+ attn_layer_dropout: 0.5
144
+ ffn_dropout: 0.5
145
+ hidden_act: relu
146
+ pre_ln: false
147
+ pre_ln_final_layer_norm: true
148
+ loss:
149
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
150
+ weight: null
151
+ reduction: mean
152
+ lr: 2.0e-05
153
+ optim:
154
+ name: adamw
155
+ lr: ${model.lr}
156
+ betas:
157
+ - 0.9
158
+ - 0.98
159
+ weight_decay: 0.001
160
+ sched:
161
+ name: InverseSquareRootAnnealing
162
+ warmup_steps: 2500
163
+ warmup_ratio: null
164
+ min_lr: 1.0e-06
165
+ trainer:
166
+ devices: 8
167
+ accelerator: gpu
168
+ precision: bf16-mixed
169
+ max_epochs: -1
170
+ max_steps: 16000
171
+ num_nodes: 1
172
+ strategy: ddp_find_unused_parameters_true
173
+ accumulate_grad_batches: 1
174
+ deterministic: false
175
+ enable_checkpointing: false
176
+ logger: false
177
+ log_every_n_steps: 1
178
+ val_check_interval: 2000
179
+ num_sanity_val_steps: 0
180
+ exp_manager:
181
+ use_datetime_version: false
182
+ exp_dir: ./peav_sortformer_train
183
+ name: peav_sortformer_train
184
+ resume_if_exists: true
185
+ resume_from_checkpoint: null
186
+ resume_ignore_no_checkpoint: true
187
+ create_tensorboard_logger: true
188
+ create_checkpoint_callback: true
189
+ create_wandb_logger: false
190
+ checkpoint_callback_params:
191
+ monitor: val_der
192
+ mode: min
193
+ save_top_k: 5
194
+ every_n_train_steps: 2000
195
+ every_n_epochs: 0
196
+ wandb_logger_kwargs:
197
+ resume: true
198
+ name: null
199
+ project: null
200
+
201
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
202
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
203
+ [NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
204
+ [NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
205
+ [NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
206
+ [NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
207
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
208
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
210
+ [NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
211
+ [NeMo I 2026-04-13 11:12:08 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
212
+ [NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
213
+ Parameter Group 0
214
+ amsgrad: False
215
+ betas: (0.9, 0.98)
216
+ capturable: False
217
+ decoupled_weight_decay: True
218
+ differentiable: False
219
+ eps: 1e-08
220
+ foreach: None
221
+ fused: None
222
+ lr: 2e-05
223
+ maximize: False
224
+ weight_decay: 0.001
225
+ )
226
+ [NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x78cb16b8e900>"
227
+ will be used during training (effective maximum steps = 16000) -
228
+ Parameters :
229
+ (warmup_steps: 2500
230
+ warmup_ratio: null
231
+ min_lr: 1.0e-06
232
+ max_steps: 16000
233
+ )
234
+ [NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118307.
235
+ [NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351996.
236
+ [NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395585.
237
+ [NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7331011.
238
+ [NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.449385.
239
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072908.
240
+ [NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771328.
241
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136598.
242
+ [NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.942039.
243
+ [NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374946.
244
+ [NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543284.
245
+ [NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291188.
246
+ [NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085422.
247
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822095.
248
+ [NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9141161.
peav_sortformer_train/version_0/events.out.tfevents.1776078731.260d0e88e8f5.93828.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00b94e3a28aa5883807f2d77d3b01f5e75bf50b51dac7fb16aebfa15e046686b
3
+ size 9138653
peav_sortformer_train/version_0/hparams.yaml ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cfg:
2
+ sample_rate: 48000
3
+ pil_weight: 0.5
4
+ ats_weight: 0.5
5
+ max_num_of_spks: 4
6
+ peav_checkpoint: pe-av-small-16-frame
7
+ peav_pretrained: true
8
+ peav_freeze_layers: 0
9
+ rttm_unit_10ms_frame_count: 4
10
+ der_collar: 0.25
11
+ der_ignore_overlap: true
12
+ model_defaults:
13
+ fc_d_model: 768
14
+ tf_d_model: 192
15
+ train_ds:
16
+ manifest_filepath: null
17
+ sample_rate: 48000
18
+ num_spks: 4
19
+ session_len_sec: 45
20
+ shift_sec: 4
21
+ soft_label_thres: 0.5
22
+ soft_targets: false
23
+ labels: null
24
+ batch_size: 24
25
+ shuffle: true
26
+ num_workers: 1
27
+ validation_mode: false
28
+ use_hf_streaming: true
29
+ hf_dataset_path: humanify/real_dia_dataset
30
+ hf_configs:
31
+ - CHiME6
32
+ - Dipco
33
+ - ICSI
34
+ - M3SD
35
+ - NOTSOFAR
36
+ - aishell4
37
+ - aishell5
38
+ - alimeeting
39
+ - ami_ihm
40
+ - ami_sdm
41
+ - callhome
42
+ - msdwild
43
+ - voxconverse
44
+ hf_split: train
45
+ shuffle_seed: 42
46
+ shuffle_buffer_size: 4
47
+ prefetch_factor: 4
48
+ persistent_workers: true
49
+ prefetch_rows: 8
50
+ use_lhotse: false
51
+ use_bucketing: true
52
+ num_buckets: 10
53
+ bucket_duration_bins:
54
+ - 10
55
+ - 20
56
+ - 30
57
+ - 40
58
+ - 50
59
+ - 60
60
+ - 70
61
+ - 80
62
+ - 90
63
+ pin_memory: true
64
+ min_duration: 10
65
+ max_duration: 90
66
+ batch_duration: 400
67
+ quadratic_duration: 1200
68
+ bucket_buffer_size: 20000
69
+ window_stride: 0.04
70
+ subsampling_factor: 1
71
+ validation_ds:
72
+ manifest_filepath: null
73
+ is_tarred: false
74
+ tarred_audio_filepaths: null
75
+ sample_rate: 48000
76
+ num_spks: 4
77
+ session_len_sec: 45
78
+ shift_sec: 4
79
+ soft_label_thres: 0.5
80
+ soft_targets: false
81
+ labels: null
82
+ batch_size: 24
83
+ shuffle: false
84
+ num_workers: 1
85
+ validation_mode: true
86
+ use_hf_streaming: true
87
+ hf_dataset_path: humanify/real_dia_dataset
88
+ hf_configs:
89
+ - alm_benchmark
90
+ hf_split: train
91
+ shuffle_seed: 42
92
+ shuffle_buffer_size: 100
93
+ prefetch_factor: 4
94
+ persistent_workers: true
95
+ prefetch_rows: 4
96
+ use_lhotse: false
97
+ use_bucketing: false
98
+ drop_last: false
99
+ pin_memory: true
100
+ window_stride: 0.04
101
+ subsampling_factor: 1
102
+ test_ds:
103
+ manifest_filepath: null
104
+ is_tarred: false
105
+ tarred_audio_filepaths: null
106
+ sample_rate: 48000
107
+ num_spks: 4
108
+ session_len_sec: 90
109
+ soft_label_thres: 0.5
110
+ soft_targets: false
111
+ labels: null
112
+ batch_size: 24
113
+ shuffle: false
114
+ seq_eval_mode: true
115
+ num_workers: 1
116
+ validation_mode: true
117
+ use_lhotse: false
118
+ use_bucketing: false
119
+ drop_last: false
120
+ pin_memory: true
121
+ window_stride: 0.04
122
+ subsampling_factor: 1
123
+ sortformer_modules:
124
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
125
+ num_spks: 4
126
+ dropout_rate: 0.1
127
+ fc_d_model: 768
128
+ tf_d_model: 192
129
+ subsampling_factor: 1
130
+ encoder:
131
+ d_model: 768
132
+ subsampling_factor: 1
133
+ transformer_encoder:
134
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
135
+ num_layers: 18
136
+ hidden_size: 192
137
+ inner_size: 768
138
+ num_attention_heads: 8
139
+ attn_score_dropout: 0.5
140
+ attn_layer_dropout: 0.5
141
+ ffn_dropout: 0.5
142
+ hidden_act: relu
143
+ pre_ln: false
144
+ pre_ln_final_layer_norm: true
145
+ loss:
146
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
147
+ weight: null
148
+ reduction: mean
149
+ lr: 2.0e-05
150
+ optim:
151
+ name: adamw
152
+ lr: 2.0e-05
153
+ betas:
154
+ - 0.9
155
+ - 0.98
156
+ weight_decay: 0.001
157
+ sched:
158
+ name: InverseSquareRootAnnealing
159
+ warmup_steps: 2500
160
+ warmup_ratio: null
161
+ min_lr: 1.0e-06