tsw0411 commited on
Commit
d699769
·
verified ·
1 Parent(s): b98c240

Upload folder using huggingface_hub

Browse files
Files changed (20) hide show
  1. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0-last.ckpt +3 -0
  2. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0.ckpt +3 -0
  3. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5632-epoch=0.ckpt +3 -0
  4. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5934-epoch=0.ckpt +3 -0
  5. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9860-epoch=0.ckpt +3 -0
  6. openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9965-epoch=0.ckpt +3 -0
  7. openflam_sortformer_train/cmd-args.log +1 -0
  8. openflam_sortformer_train/git-info.log +13 -0
  9. openflam_sortformer_train/lightning_logs.txt +26 -0
  10. openflam_sortformer_train/nemo_error_log.txt +8 -0
  11. openflam_sortformer_train/nemo_log_globalrank-0_localrank-0.txt +251 -0
  12. openflam_sortformer_train/nemo_log_globalrank-1_localrank-1.txt +245 -0
  13. openflam_sortformer_train/nemo_log_globalrank-2_localrank-2.txt +245 -0
  14. openflam_sortformer_train/nemo_log_globalrank-3_localrank-3.txt +245 -0
  15. openflam_sortformer_train/nemo_log_globalrank-4_localrank-4.txt +245 -0
  16. openflam_sortformer_train/nemo_log_globalrank-5_localrank-5.txt +245 -0
  17. openflam_sortformer_train/nemo_log_globalrank-6_localrank-6.txt +245 -0
  18. openflam_sortformer_train/nemo_log_globalrank-7_localrank-7.txt +245 -0
  19. openflam_sortformer_train/version_0/events.out.tfevents.1776106183.aa77be2546cc.6641.0 +3 -0
  20. openflam_sortformer_train/version_0/hparams.yaml +162 -0
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0-last.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:983df478cabe165ab622a9ee0d94cd07981c1a4bedf8abbafe3903554d9a8d9b
3
+ size 920132478
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee2ac83b80a349a0950950419f2084f3529d1bb086c4a32ddd524f36d360eede
3
+ size 920132478
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5632-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c048f1c92fe4593462fcd82c7dd958147b92f1274db9306de6dbcc43cd18c46
3
+ size 920132286
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5934-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b63d1d860ac8dd26bea4f25705e02425c83cbf1808801a76819b30c354f8beed
3
+ size 920131839
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9860-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c19d6b2d2b344445596915b774cfed53da4ae89f78df0c3c77b1f74e42e34c62
3
+ size 920131456
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9965-epoch=0.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb14be6ffe43bc10ba930914e66471d825201cb53e72fbbd4f3d8719d1083e05
3
+ size 920131009
openflam_sortformer_train/cmd-args.log ADDED
@@ -0,0 +1 @@
 
 
1
+ scripts/openflam_sortformer_train.py --config-path=../configs --config-name=openflam_sortformer_4spk.yaml exp_manager.name=openflam_sortformer_train exp_manager.exp_dir=./openflam_sortformer_train
openflam_sortformer_train/git-info.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ commit hash: 5ecc0a3c86af359b2f459bbb39f69a6fefc4bc40
2
+ diff --git a/.gitignore b/.gitignore
3
+ index 982a755..0707177 100644
4
+ --- a/.gitignore
5
+ +++ b/.gitignore
6
+ @@ -10,6 +10,7 @@ __pycache__/
7
+ peav_sortformer_train/
8
+ dasheng_sortformer_train/
9
+ peav_sortformer_v2_train
10
+ +openflam_sortformer_train/
11
+
12
+ debug_hf_training_window/
13
+
openflam_sortformer_train/lightning_logs.txt ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
2
+
3
+ | Name | Type | Params | Mode
4
+ --------------------------------------------------------------------------------
5
+ 0 | htsat_spectrogram_extractor | Spectrogram | 1.1 M | train
6
+ 1 | htsat_logmel_extractor | LogmelFilterBank | 32.8 K | train
7
+ 2 | htsat_bn0 | BatchNorm2d | 128 | eval
8
+ 3 | htsat_patch_embed | PatchEmbed | 2.4 K | train
9
+ 4 | htsat_pos_drop | Dropout | 0 | train
10
+ 5 | encoder | OpenFLAMEncoderWrapper | 67.8 M | train
11
+ 6 | sortformer_modules | SortformerModules | 236 K | train
12
+ 7 | transformer_encoder | TransformerEncoder | 8.0 M | train
13
+ 8 | loss | BCELoss | 0 | train
14
+ 9 | _accuracy_test | MultiBinaryAccuracy | 0 | train
15
+ 10 | _accuracy_train | MultiBinaryAccuracy | 0 | train
16
+ 11 | _accuracy_valid | MultiBinaryAccuracy | 0 | train
17
+ 12 | _accuracy_test_ats | MultiBinaryAccuracy | 0 | train
18
+ 13 | _accuracy_train_ats | MultiBinaryAccuracy | 0 | train
19
+ 14 | _accuracy_valid_ats | MultiBinaryAccuracy | 0 | train
20
+ --------------------------------------------------------------------------------
21
+ 76.1 M Trainable params
22
+ 1.1 M Non-trainable params
23
+ 77.2 M Total params
24
+ 308.606 Total estimated model params size (MB)
25
+ 567 Modules in train mode
26
+ 1 Modules in eval mode
openflam_sortformer_train/nemo_error_log.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
2
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
3
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
4
+ [NeMo W 2026-04-13 18:48:50 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
5
+ [NeMo W 2026-04-13 18:49:43 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
6
+
7
+ [NeMo W 2026-04-13 18:50:30 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
8
+
openflam_sortformer_train/nemo_log_globalrank-0_localrank-0.txt ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:48:50 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo I 2026-04-13 18:48:50 exp_manager:594] ExpManager schema
203
+ [NeMo I 2026-04-13 18:48:50 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
204
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
205
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
206
+ [NeMo I 2026-04-13 18:48:50 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
207
+ [NeMo I 2026-04-13 18:48:50 exp_manager:1262] TensorboardLogger has been set up
208
+ [NeMo W 2026-04-13 18:48:50 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
209
+ [NeMo I 2026-04-13 18:48:50 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
210
+ [NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
211
+ [NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
212
+ [NeMo W 2026-04-13 18:48:50 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
213
+ [NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
214
+ [NeMo I 2026-04-13 18:49:11 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
215
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
216
+ Parameter Group 0
217
+ amsgrad: False
218
+ betas: (0.9, 0.98)
219
+ capturable: False
220
+ decoupled_weight_decay: True
221
+ differentiable: False
222
+ eps: 1e-08
223
+ foreach: None
224
+ fused: None
225
+ lr: 2e-05
226
+ maximize: False
227
+ weight_decay: 0.001
228
+ )
229
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fd2ee2d5940>"
230
+ will be used during training (effective maximum steps = 16000) -
231
+ Parameters :
232
+ (warmup_steps: 2500
233
+ warmup_ratio: null
234
+ min_lr: 1.0e-06
235
+ max_steps: 16000
236
+ )
237
+ [NeMo W 2026-04-13 18:49:43 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
238
+
239
+ [NeMo W 2026-04-13 18:50:30 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
240
+
241
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453387.
242
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900334.
243
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641889.
244
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533197.
245
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908803.
246
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131054.
247
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344173.
248
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732732.
249
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.742586.
250
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947825.
251
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5716007.
openflam_sortformer_train/nemo_log_globalrank-1_localrank-1.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:23 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:23 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:23 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:23 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:28 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f9f5130a780>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453328.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900246.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641915.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533032.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908696.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131009.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344163.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732656.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425637.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947732.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715966.
openflam_sortformer_train/nemo_log_globalrank-2_localrank-2.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fadd270ede0>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.445336.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900196.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641746.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533016.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908696.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013098.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344163.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073263.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425504.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947706.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.57159.
openflam_sortformer_train/nemo_log_globalrank-3_localrank-3.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:40 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f2ab4b2f1d0>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.445329.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900208.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641736.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533004.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908863.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0130968.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344135.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732584.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425501.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.89477.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715883.
openflam_sortformer_train/nemo_log_globalrank-4_localrank-4.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:23 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:23 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:23 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:23 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:23 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:28 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fa4985c0560>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453347.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900336.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641777.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533013.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.190873.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013099.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344116.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073265.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425628.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947723.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.571588.
openflam_sortformer_train/nemo_log_globalrank-5_localrank-5.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:38 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f1f5bc1b6e0>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453282.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900138.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.164175.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533056.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908715.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131006.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344132.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073263.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425544.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947694.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715895.
openflam_sortformer_train/nemo_log_globalrank-6_localrank-6.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:39 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f3350a98bf0>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453306.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.490022.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.16418.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533128.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.190873.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0130994.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.634412.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732641.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425518.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947701.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715954.
openflam_sortformer_train/nemo_log_globalrank-7_localrank-7.txt ADDED
@@ -0,0 +1,245 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
2
+ num_workers: 1
3
+ batch_size: 24
4
+ model:
5
+ sample_rate: 48000
6
+ pil_weight: 0.5
7
+ ats_weight: 0.5
8
+ max_num_of_spks: 4
9
+ openflam_model_name: v1-base
10
+ openflam_pretrained: true
11
+ openflam_freeze_layers: 0
12
+ openflam_ckpt_cache_dir: /tmp/openflam
13
+ rttm_unit_10ms_frame_count: 31
14
+ der_collar: 0.25
15
+ der_ignore_overlap: true
16
+ model_defaults:
17
+ fc_d_model: 1024
18
+ tf_d_model: 192
19
+ train_ds:
20
+ manifest_filepath: null
21
+ sample_rate: ${model.sample_rate}
22
+ num_spks: ${model.max_num_of_spks}
23
+ session_len_sec: 45
24
+ shift_sec: 4
25
+ soft_label_thres: 0.5
26
+ soft_targets: false
27
+ labels: null
28
+ batch_size: ${batch_size}
29
+ shuffle: true
30
+ num_workers: ${num_workers}
31
+ validation_mode: false
32
+ use_hf_streaming: true
33
+ hf_dataset_path: humanify/real_dia_dataset
34
+ hf_configs:
35
+ - CHiME6
36
+ - Dipco
37
+ - ICSI
38
+ - M3SD
39
+ - NOTSOFAR
40
+ - aishell4
41
+ - aishell5
42
+ - alimeeting
43
+ - ami_ihm
44
+ - ami_sdm
45
+ - callhome
46
+ - msdwild
47
+ - voxconverse
48
+ hf_split: train
49
+ shuffle_seed: 42
50
+ shuffle_buffer_size: 4
51
+ prefetch_factor: 4
52
+ persistent_workers: true
53
+ prefetch_rows: 8
54
+ use_lhotse: false
55
+ use_bucketing: true
56
+ num_buckets: 10
57
+ bucket_duration_bins:
58
+ - 10
59
+ - 20
60
+ - 30
61
+ - 40
62
+ - 50
63
+ - 60
64
+ - 70
65
+ - 80
66
+ - 90
67
+ pin_memory: true
68
+ min_duration: 10
69
+ max_duration: 90
70
+ batch_duration: 400
71
+ quadratic_duration: 1200
72
+ bucket_buffer_size: 20000
73
+ window_stride: 0.3125
74
+ subsampling_factor: 1
75
+ validation_ds:
76
+ manifest_filepath: null
77
+ is_tarred: false
78
+ tarred_audio_filepaths: null
79
+ sample_rate: ${model.sample_rate}
80
+ num_spks: ${model.max_num_of_spks}
81
+ session_len_sec: 45
82
+ shift_sec: 4
83
+ soft_label_thres: 0.5
84
+ soft_targets: false
85
+ labels: null
86
+ batch_size: ${batch_size}
87
+ shuffle: false
88
+ num_workers: ${num_workers}
89
+ validation_mode: true
90
+ use_hf_streaming: true
91
+ hf_dataset_path: humanify/real_dia_dataset
92
+ hf_configs:
93
+ - alm_benchmark
94
+ hf_split: train
95
+ shuffle_seed: 42
96
+ shuffle_buffer_size: 100
97
+ prefetch_factor: 4
98
+ persistent_workers: true
99
+ prefetch_rows: 4
100
+ use_lhotse: false
101
+ use_bucketing: false
102
+ drop_last: false
103
+ pin_memory: true
104
+ window_stride: 0.3125
105
+ subsampling_factor: 1
106
+ test_ds:
107
+ manifest_filepath: null
108
+ is_tarred: false
109
+ tarred_audio_filepaths: null
110
+ sample_rate: ${model.sample_rate}
111
+ num_spks: ${model.max_num_of_spks}
112
+ session_len_sec: 90
113
+ soft_label_thres: 0.5
114
+ soft_targets: false
115
+ labels: null
116
+ batch_size: ${batch_size}
117
+ shuffle: false
118
+ seq_eval_mode: true
119
+ num_workers: ${num_workers}
120
+ validation_mode: true
121
+ use_lhotse: false
122
+ use_bucketing: false
123
+ drop_last: false
124
+ pin_memory: true
125
+ window_stride: 0.3125
126
+ subsampling_factor: 1
127
+ sortformer_modules:
128
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
129
+ num_spks: ${model.max_num_of_spks}
130
+ dropout_rate: 0.1
131
+ fc_d_model: ${model.model_defaults.fc_d_model}
132
+ tf_d_model: ${model.model_defaults.tf_d_model}
133
+ subsampling_factor: 1
134
+ encoder:
135
+ d_model: ${model.model_defaults.fc_d_model}
136
+ subsampling_factor: 1
137
+ transformer_encoder:
138
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
139
+ num_layers: 18
140
+ hidden_size: ${model.model_defaults.tf_d_model}
141
+ inner_size: 768
142
+ num_attention_heads: 8
143
+ attn_score_dropout: 0.5
144
+ attn_layer_dropout: 0.5
145
+ ffn_dropout: 0.5
146
+ hidden_act: relu
147
+ pre_ln: false
148
+ pre_ln_final_layer_norm: true
149
+ loss:
150
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
151
+ weight: null
152
+ reduction: mean
153
+ lr: 2.0e-05
154
+ optim:
155
+ name: adamw
156
+ lr: ${model.lr}
157
+ betas:
158
+ - 0.9
159
+ - 0.98
160
+ weight_decay: 0.001
161
+ sched:
162
+ name: InverseSquareRootAnnealing
163
+ warmup_steps: 2500
164
+ warmup_ratio: null
165
+ min_lr: 1.0e-06
166
+ trainer:
167
+ devices: 8
168
+ accelerator: gpu
169
+ precision: bf16-mixed
170
+ max_epochs: -1
171
+ max_steps: 16000
172
+ num_nodes: 1
173
+ strategy: ddp_find_unused_parameters_true
174
+ accumulate_grad_batches: 1
175
+ deterministic: false
176
+ enable_checkpointing: false
177
+ logger: false
178
+ log_every_n_steps: 1
179
+ val_check_interval: 2000
180
+ num_sanity_val_steps: 0
181
+ exp_manager:
182
+ use_datetime_version: false
183
+ exp_dir: ./openflam_sortformer_train
184
+ name: openflam_sortformer_train
185
+ resume_if_exists: true
186
+ resume_from_checkpoint: null
187
+ resume_ignore_no_checkpoint: true
188
+ create_tensorboard_logger: true
189
+ create_checkpoint_callback: true
190
+ create_wandb_logger: false
191
+ checkpoint_callback_params:
192
+ monitor: val_der
193
+ mode: min
194
+ save_top_k: 5
195
+ every_n_train_steps: 2000
196
+ every_n_epochs: 0
197
+ wandb_logger_kwargs:
198
+ resume: true
199
+ name: null
200
+ project: null
201
+
202
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
203
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
204
+ [NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
205
+ [NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
206
+ [NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
207
+ [NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
208
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
209
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
210
+ [NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
211
+ [NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
212
+ [NeMo I 2026-04-13 18:49:38 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
213
+ [NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
214
+ Parameter Group 0
215
+ amsgrad: False
216
+ betas: (0.9, 0.98)
217
+ capturable: False
218
+ decoupled_weight_decay: True
219
+ differentiable: False
220
+ eps: 1e-08
221
+ foreach: None
222
+ fused: None
223
+ lr: 2e-05
224
+ maximize: False
225
+ weight_decay: 0.001
226
+ )
227
+ [NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f82856aaba0>"
228
+ will be used during training (effective maximum steps = 16000) -
229
+ Parameters :
230
+ (warmup_steps: 2500
231
+ warmup_ratio: null
232
+ min_lr: 1.0e-06
233
+ max_steps: 16000
234
+ )
235
+ [NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453285.
236
+ [NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900186.
237
+ [NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641943.
238
+ [NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533047.
239
+ [NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908693.
240
+ [NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013106.
241
+ [NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344168.
242
+ [NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732667.
243
+ [NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425685.
244
+ [NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947742.
245
+ [NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.571593.
openflam_sortformer_train/version_0/events.out.tfevents.1776106183.aa77be2546cc.6641.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b012681cc9b2db7e02c55703a59a4e2907b43281c8fc25798c0fc1cf62e14bb
3
+ size 7667179
openflam_sortformer_train/version_0/hparams.yaml ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cfg:
2
+ sample_rate: 48000
3
+ pil_weight: 0.5
4
+ ats_weight: 0.5
5
+ max_num_of_spks: 4
6
+ openflam_model_name: v1-base
7
+ openflam_pretrained: true
8
+ openflam_freeze_layers: 0
9
+ openflam_ckpt_cache_dir: /tmp/openflam
10
+ rttm_unit_10ms_frame_count: 31
11
+ der_collar: 0.25
12
+ der_ignore_overlap: true
13
+ model_defaults:
14
+ fc_d_model: 1024
15
+ tf_d_model: 192
16
+ train_ds:
17
+ manifest_filepath: null
18
+ sample_rate: 48000
19
+ num_spks: 4
20
+ session_len_sec: 45
21
+ shift_sec: 4
22
+ soft_label_thres: 0.5
23
+ soft_targets: false
24
+ labels: null
25
+ batch_size: 24
26
+ shuffle: true
27
+ num_workers: 1
28
+ validation_mode: false
29
+ use_hf_streaming: true
30
+ hf_dataset_path: humanify/real_dia_dataset
31
+ hf_configs:
32
+ - CHiME6
33
+ - Dipco
34
+ - ICSI
35
+ - M3SD
36
+ - NOTSOFAR
37
+ - aishell4
38
+ - aishell5
39
+ - alimeeting
40
+ - ami_ihm
41
+ - ami_sdm
42
+ - callhome
43
+ - msdwild
44
+ - voxconverse
45
+ hf_split: train
46
+ shuffle_seed: 42
47
+ shuffle_buffer_size: 4
48
+ prefetch_factor: 4
49
+ persistent_workers: true
50
+ prefetch_rows: 8
51
+ use_lhotse: false
52
+ use_bucketing: true
53
+ num_buckets: 10
54
+ bucket_duration_bins:
55
+ - 10
56
+ - 20
57
+ - 30
58
+ - 40
59
+ - 50
60
+ - 60
61
+ - 70
62
+ - 80
63
+ - 90
64
+ pin_memory: true
65
+ min_duration: 10
66
+ max_duration: 90
67
+ batch_duration: 400
68
+ quadratic_duration: 1200
69
+ bucket_buffer_size: 20000
70
+ window_stride: 0.3125
71
+ subsampling_factor: 1
72
+ validation_ds:
73
+ manifest_filepath: null
74
+ is_tarred: false
75
+ tarred_audio_filepaths: null
76
+ sample_rate: 48000
77
+ num_spks: 4
78
+ session_len_sec: 45
79
+ shift_sec: 4
80
+ soft_label_thres: 0.5
81
+ soft_targets: false
82
+ labels: null
83
+ batch_size: 24
84
+ shuffle: false
85
+ num_workers: 1
86
+ validation_mode: true
87
+ use_hf_streaming: true
88
+ hf_dataset_path: humanify/real_dia_dataset
89
+ hf_configs:
90
+ - alm_benchmark
91
+ hf_split: train
92
+ shuffle_seed: 42
93
+ shuffle_buffer_size: 100
94
+ prefetch_factor: 4
95
+ persistent_workers: true
96
+ prefetch_rows: 4
97
+ use_lhotse: false
98
+ use_bucketing: false
99
+ drop_last: false
100
+ pin_memory: true
101
+ window_stride: 0.3125
102
+ subsampling_factor: 1
103
+ test_ds:
104
+ manifest_filepath: null
105
+ is_tarred: false
106
+ tarred_audio_filepaths: null
107
+ sample_rate: 48000
108
+ num_spks: 4
109
+ session_len_sec: 90
110
+ soft_label_thres: 0.5
111
+ soft_targets: false
112
+ labels: null
113
+ batch_size: 24
114
+ shuffle: false
115
+ seq_eval_mode: true
116
+ num_workers: 1
117
+ validation_mode: true
118
+ use_lhotse: false
119
+ use_bucketing: false
120
+ drop_last: false
121
+ pin_memory: true
122
+ window_stride: 0.3125
123
+ subsampling_factor: 1
124
+ sortformer_modules:
125
+ _target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
126
+ num_spks: 4
127
+ dropout_rate: 0.1
128
+ fc_d_model: 1024
129
+ tf_d_model: 192
130
+ subsampling_factor: 1
131
+ encoder:
132
+ d_model: 1024
133
+ subsampling_factor: 1
134
+ transformer_encoder:
135
+ _target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
136
+ num_layers: 18
137
+ hidden_size: 192
138
+ inner_size: 768
139
+ num_attention_heads: 8
140
+ attn_score_dropout: 0.5
141
+ attn_layer_dropout: 0.5
142
+ ffn_dropout: 0.5
143
+ hidden_act: relu
144
+ pre_ln: false
145
+ pre_ln_final_layer_norm: true
146
+ loss:
147
+ _target_: nemo.collections.asr.losses.bce_loss.BCELoss
148
+ weight: null
149
+ reduction: mean
150
+ lr: 2.0e-05
151
+ optim:
152
+ name: adamw
153
+ lr: 2.0e-05
154
+ betas:
155
+ - 0.9
156
+ - 0.98
157
+ weight_decay: 0.001
158
+ sched:
159
+ name: InverseSquareRootAnnealing
160
+ warmup_steps: 2500
161
+ warmup_ratio: null
162
+ min_lr: 1.0e-06