Upload folder using huggingface_hub
Browse files- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0-last.ckpt +3 -0
- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0.ckpt +3 -0
- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5632-epoch=0.ckpt +3 -0
- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5934-epoch=0.ckpt +3 -0
- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9860-epoch=0.ckpt +3 -0
- openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9965-epoch=0.ckpt +3 -0
- openflam_sortformer_train/cmd-args.log +1 -0
- openflam_sortformer_train/git-info.log +13 -0
- openflam_sortformer_train/lightning_logs.txt +26 -0
- openflam_sortformer_train/nemo_error_log.txt +8 -0
- openflam_sortformer_train/nemo_log_globalrank-0_localrank-0.txt +251 -0
- openflam_sortformer_train/nemo_log_globalrank-1_localrank-1.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-2_localrank-2.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-3_localrank-3.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-4_localrank-4.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-5_localrank-5.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-6_localrank-6.txt +245 -0
- openflam_sortformer_train/nemo_log_globalrank-7_localrank-7.txt +245 -0
- openflam_sortformer_train/version_0/events.out.tfevents.1776106183.aa77be2546cc.6641.0 +3 -0
- openflam_sortformer_train/version_0/hparams.yaml +162 -0
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0-last.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:983df478cabe165ab622a9ee0d94cd07981c1a4bedf8abbafe3903554d9a8d9b
|
| 3 |
+
size 920132478
|
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5475-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ee2ac83b80a349a0950950419f2084f3529d1bb086c4a32ddd524f36d360eede
|
| 3 |
+
size 920132478
|
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5632-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0c048f1c92fe4593462fcd82c7dd958147b92f1274db9306de6dbcc43cd18c46
|
| 3 |
+
size 920132286
|
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.5934-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b63d1d860ac8dd26bea4f25705e02425c83cbf1808801a76819b30c354f8beed
|
| 3 |
+
size 920131839
|
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9860-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c19d6b2d2b344445596915b774cfed53da4ae89f78df0c3c77b1f74e42e34c62
|
| 3 |
+
size 920131456
|
openflam_sortformer_train/checkpoints/openflam_sortformer_train--val_der=0.9965-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb14be6ffe43bc10ba930914e66471d825201cb53e72fbbd4f3d8719d1083e05
|
| 3 |
+
size 920131009
|
openflam_sortformer_train/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
scripts/openflam_sortformer_train.py --config-path=../configs --config-name=openflam_sortformer_4spk.yaml exp_manager.name=openflam_sortformer_train exp_manager.exp_dir=./openflam_sortformer_train
|
openflam_sortformer_train/git-info.log
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
commit hash: 5ecc0a3c86af359b2f459bbb39f69a6fefc4bc40
|
| 2 |
+
diff --git a/.gitignore b/.gitignore
|
| 3 |
+
index 982a755..0707177 100644
|
| 4 |
+
--- a/.gitignore
|
| 5 |
+
+++ b/.gitignore
|
| 6 |
+
@@ -10,6 +10,7 @@ __pycache__/
|
| 7 |
+
peav_sortformer_train/
|
| 8 |
+
dasheng_sortformer_train/
|
| 9 |
+
peav_sortformer_v2_train
|
| 10 |
+
+openflam_sortformer_train/
|
| 11 |
+
|
| 12 |
+
debug_hf_training_window/
|
| 13 |
+
|
openflam_sortformer_train/lightning_logs.txt
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 2 |
+
|
| 3 |
+
| Name | Type | Params | Mode
|
| 4 |
+
--------------------------------------------------------------------------------
|
| 5 |
+
0 | htsat_spectrogram_extractor | Spectrogram | 1.1 M | train
|
| 6 |
+
1 | htsat_logmel_extractor | LogmelFilterBank | 32.8 K | train
|
| 7 |
+
2 | htsat_bn0 | BatchNorm2d | 128 | eval
|
| 8 |
+
3 | htsat_patch_embed | PatchEmbed | 2.4 K | train
|
| 9 |
+
4 | htsat_pos_drop | Dropout | 0 | train
|
| 10 |
+
5 | encoder | OpenFLAMEncoderWrapper | 67.8 M | train
|
| 11 |
+
6 | sortformer_modules | SortformerModules | 236 K | train
|
| 12 |
+
7 | transformer_encoder | TransformerEncoder | 8.0 M | train
|
| 13 |
+
8 | loss | BCELoss | 0 | train
|
| 14 |
+
9 | _accuracy_test | MultiBinaryAccuracy | 0 | train
|
| 15 |
+
10 | _accuracy_train | MultiBinaryAccuracy | 0 | train
|
| 16 |
+
11 | _accuracy_valid | MultiBinaryAccuracy | 0 | train
|
| 17 |
+
12 | _accuracy_test_ats | MultiBinaryAccuracy | 0 | train
|
| 18 |
+
13 | _accuracy_train_ats | MultiBinaryAccuracy | 0 | train
|
| 19 |
+
14 | _accuracy_valid_ats | MultiBinaryAccuracy | 0 | train
|
| 20 |
+
--------------------------------------------------------------------------------
|
| 21 |
+
76.1 M Trainable params
|
| 22 |
+
1.1 M Non-trainable params
|
| 23 |
+
77.2 M Total params
|
| 24 |
+
308.606 Total estimated model params size (MB)
|
| 25 |
+
567 Modules in train mode
|
| 26 |
+
1 Modules in eval mode
|
openflam_sortformer_train/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 2 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 3 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 4 |
+
[NeMo W 2026-04-13 18:48:50 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 5 |
+
[NeMo W 2026-04-13 18:49:43 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
|
| 6 |
+
|
| 7 |
+
[NeMo W 2026-04-13 18:50:30 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
|
| 8 |
+
|
openflam_sortformer_train/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:48:50 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo I 2026-04-13 18:48:50 exp_manager:594] ExpManager schema
|
| 203 |
+
[NeMo I 2026-04-13 18:48:50 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
|
| 204 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 205 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 206 |
+
[NeMo I 2026-04-13 18:48:50 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 207 |
+
[NeMo I 2026-04-13 18:48:50 exp_manager:1262] TensorboardLogger has been set up
|
| 208 |
+
[NeMo W 2026-04-13 18:48:50 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 209 |
+
[NeMo I 2026-04-13 18:48:50 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 210 |
+
[NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 211 |
+
[NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 212 |
+
[NeMo W 2026-04-13 18:48:50 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 213 |
+
[NeMo I 2026-04-13 18:48:50 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 214 |
+
[NeMo I 2026-04-13 18:49:11 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 215 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 216 |
+
Parameter Group 0
|
| 217 |
+
amsgrad: False
|
| 218 |
+
betas: (0.9, 0.98)
|
| 219 |
+
capturable: False
|
| 220 |
+
decoupled_weight_decay: True
|
| 221 |
+
differentiable: False
|
| 222 |
+
eps: 1e-08
|
| 223 |
+
foreach: None
|
| 224 |
+
fused: None
|
| 225 |
+
lr: 2e-05
|
| 226 |
+
maximize: False
|
| 227 |
+
weight_decay: 0.001
|
| 228 |
+
)
|
| 229 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fd2ee2d5940>"
|
| 230 |
+
will be used during training (effective maximum steps = 16000) -
|
| 231 |
+
Parameters :
|
| 232 |
+
(warmup_steps: 2500
|
| 233 |
+
warmup_ratio: null
|
| 234 |
+
min_lr: 1.0e-06
|
| 235 |
+
max_steps: 16000
|
| 236 |
+
)
|
| 237 |
+
[NeMo W 2026-04-13 18:49:43 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
|
| 238 |
+
|
| 239 |
+
[NeMo W 2026-04-13 18:50:30 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
|
| 240 |
+
|
| 241 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453387.
|
| 242 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900334.
|
| 243 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641889.
|
| 244 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533197.
|
| 245 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908803.
|
| 246 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131054.
|
| 247 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344173.
|
| 248 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732732.
|
| 249 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.742586.
|
| 250 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947825.
|
| 251 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5716007.
|
openflam_sortformer_train/nemo_log_globalrank-1_localrank-1.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:23 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:28 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f9f5130a780>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453328.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900246.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641915.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533032.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908696.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131009.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344163.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732656.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425637.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947732.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715966.
|
openflam_sortformer_train/nemo_log_globalrank-2_localrank-2.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fadd270ede0>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.445336.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900196.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641746.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533016.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908696.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013098.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344163.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073263.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425504.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947706.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.57159.
|
openflam_sortformer_train/nemo_log_globalrank-3_localrank-3.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:40 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f2ab4b2f1d0>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.445329.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900208.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641736.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533004.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908863.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0130968.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344135.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732584.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425501.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.89477.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715883.
|
openflam_sortformer_train/nemo_log_globalrank-4_localrank-4.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:23 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:23 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:23 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:28 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:28 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:42 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7fa4985c0560>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453347.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900336.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641777.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533013.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.190873.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013099.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344116.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073265.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425628.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947723.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.571588.
|
openflam_sortformer_train/nemo_log_globalrank-5_localrank-5.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:38 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f1f5bc1b6e0>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453282.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900138.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.164175.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533056.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908715.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0131006.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344132.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.073263.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425544.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947694.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715895.
|
openflam_sortformer_train/nemo_log_globalrank-6_localrank-6.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:39 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f3350a98bf0>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453306.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.490022.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.16418.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533128.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.190873.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.0130994.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.634412.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732641.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425518.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947701.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.5715954.
|
openflam_sortformer_train/nemo_log_globalrank-7_localrank-7.txt
ADDED
|
@@ -0,0 +1,245 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 18:49:22 openflam_sortformer_train:31] Hydra config: name: OpenFLAMSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
openflam_model_name: v1-base
|
| 10 |
+
openflam_pretrained: true
|
| 11 |
+
openflam_freeze_layers: 0
|
| 12 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 13 |
+
rttm_unit_10ms_frame_count: 31
|
| 14 |
+
der_collar: 0.25
|
| 15 |
+
der_ignore_overlap: true
|
| 16 |
+
model_defaults:
|
| 17 |
+
fc_d_model: 1024
|
| 18 |
+
tf_d_model: 192
|
| 19 |
+
train_ds:
|
| 20 |
+
manifest_filepath: null
|
| 21 |
+
sample_rate: ${model.sample_rate}
|
| 22 |
+
num_spks: ${model.max_num_of_spks}
|
| 23 |
+
session_len_sec: 45
|
| 24 |
+
shift_sec: 4
|
| 25 |
+
soft_label_thres: 0.5
|
| 26 |
+
soft_targets: false
|
| 27 |
+
labels: null
|
| 28 |
+
batch_size: ${batch_size}
|
| 29 |
+
shuffle: true
|
| 30 |
+
num_workers: ${num_workers}
|
| 31 |
+
validation_mode: false
|
| 32 |
+
use_hf_streaming: true
|
| 33 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 34 |
+
hf_configs:
|
| 35 |
+
- CHiME6
|
| 36 |
+
- Dipco
|
| 37 |
+
- ICSI
|
| 38 |
+
- M3SD
|
| 39 |
+
- NOTSOFAR
|
| 40 |
+
- aishell4
|
| 41 |
+
- aishell5
|
| 42 |
+
- alimeeting
|
| 43 |
+
- ami_ihm
|
| 44 |
+
- ami_sdm
|
| 45 |
+
- callhome
|
| 46 |
+
- msdwild
|
| 47 |
+
- voxconverse
|
| 48 |
+
hf_split: train
|
| 49 |
+
shuffle_seed: 42
|
| 50 |
+
shuffle_buffer_size: 4
|
| 51 |
+
prefetch_factor: 4
|
| 52 |
+
persistent_workers: true
|
| 53 |
+
prefetch_rows: 8
|
| 54 |
+
use_lhotse: false
|
| 55 |
+
use_bucketing: true
|
| 56 |
+
num_buckets: 10
|
| 57 |
+
bucket_duration_bins:
|
| 58 |
+
- 10
|
| 59 |
+
- 20
|
| 60 |
+
- 30
|
| 61 |
+
- 40
|
| 62 |
+
- 50
|
| 63 |
+
- 60
|
| 64 |
+
- 70
|
| 65 |
+
- 80
|
| 66 |
+
- 90
|
| 67 |
+
pin_memory: true
|
| 68 |
+
min_duration: 10
|
| 69 |
+
max_duration: 90
|
| 70 |
+
batch_duration: 400
|
| 71 |
+
quadratic_duration: 1200
|
| 72 |
+
bucket_buffer_size: 20000
|
| 73 |
+
window_stride: 0.3125
|
| 74 |
+
subsampling_factor: 1
|
| 75 |
+
validation_ds:
|
| 76 |
+
manifest_filepath: null
|
| 77 |
+
is_tarred: false
|
| 78 |
+
tarred_audio_filepaths: null
|
| 79 |
+
sample_rate: ${model.sample_rate}
|
| 80 |
+
num_spks: ${model.max_num_of_spks}
|
| 81 |
+
session_len_sec: 45
|
| 82 |
+
shift_sec: 4
|
| 83 |
+
soft_label_thres: 0.5
|
| 84 |
+
soft_targets: false
|
| 85 |
+
labels: null
|
| 86 |
+
batch_size: ${batch_size}
|
| 87 |
+
shuffle: false
|
| 88 |
+
num_workers: ${num_workers}
|
| 89 |
+
validation_mode: true
|
| 90 |
+
use_hf_streaming: true
|
| 91 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 92 |
+
hf_configs:
|
| 93 |
+
- alm_benchmark
|
| 94 |
+
hf_split: train
|
| 95 |
+
shuffle_seed: 42
|
| 96 |
+
shuffle_buffer_size: 100
|
| 97 |
+
prefetch_factor: 4
|
| 98 |
+
persistent_workers: true
|
| 99 |
+
prefetch_rows: 4
|
| 100 |
+
use_lhotse: false
|
| 101 |
+
use_bucketing: false
|
| 102 |
+
drop_last: false
|
| 103 |
+
pin_memory: true
|
| 104 |
+
window_stride: 0.3125
|
| 105 |
+
subsampling_factor: 1
|
| 106 |
+
test_ds:
|
| 107 |
+
manifest_filepath: null
|
| 108 |
+
is_tarred: false
|
| 109 |
+
tarred_audio_filepaths: null
|
| 110 |
+
sample_rate: ${model.sample_rate}
|
| 111 |
+
num_spks: ${model.max_num_of_spks}
|
| 112 |
+
session_len_sec: 90
|
| 113 |
+
soft_label_thres: 0.5
|
| 114 |
+
soft_targets: false
|
| 115 |
+
labels: null
|
| 116 |
+
batch_size: ${batch_size}
|
| 117 |
+
shuffle: false
|
| 118 |
+
seq_eval_mode: true
|
| 119 |
+
num_workers: ${num_workers}
|
| 120 |
+
validation_mode: true
|
| 121 |
+
use_lhotse: false
|
| 122 |
+
use_bucketing: false
|
| 123 |
+
drop_last: false
|
| 124 |
+
pin_memory: true
|
| 125 |
+
window_stride: 0.3125
|
| 126 |
+
subsampling_factor: 1
|
| 127 |
+
sortformer_modules:
|
| 128 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 129 |
+
num_spks: ${model.max_num_of_spks}
|
| 130 |
+
dropout_rate: 0.1
|
| 131 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 132 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
encoder:
|
| 135 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 136 |
+
subsampling_factor: 1
|
| 137 |
+
transformer_encoder:
|
| 138 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 139 |
+
num_layers: 18
|
| 140 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 141 |
+
inner_size: 768
|
| 142 |
+
num_attention_heads: 8
|
| 143 |
+
attn_score_dropout: 0.5
|
| 144 |
+
attn_layer_dropout: 0.5
|
| 145 |
+
ffn_dropout: 0.5
|
| 146 |
+
hidden_act: relu
|
| 147 |
+
pre_ln: false
|
| 148 |
+
pre_ln_final_layer_norm: true
|
| 149 |
+
loss:
|
| 150 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 151 |
+
weight: null
|
| 152 |
+
reduction: mean
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
optim:
|
| 155 |
+
name: adamw
|
| 156 |
+
lr: ${model.lr}
|
| 157 |
+
betas:
|
| 158 |
+
- 0.9
|
| 159 |
+
- 0.98
|
| 160 |
+
weight_decay: 0.001
|
| 161 |
+
sched:
|
| 162 |
+
name: InverseSquareRootAnnealing
|
| 163 |
+
warmup_steps: 2500
|
| 164 |
+
warmup_ratio: null
|
| 165 |
+
min_lr: 1.0e-06
|
| 166 |
+
trainer:
|
| 167 |
+
devices: 8
|
| 168 |
+
accelerator: gpu
|
| 169 |
+
precision: bf16-mixed
|
| 170 |
+
max_epochs: -1
|
| 171 |
+
max_steps: 16000
|
| 172 |
+
num_nodes: 1
|
| 173 |
+
strategy: ddp_find_unused_parameters_true
|
| 174 |
+
accumulate_grad_batches: 1
|
| 175 |
+
deterministic: false
|
| 176 |
+
enable_checkpointing: false
|
| 177 |
+
logger: false
|
| 178 |
+
log_every_n_steps: 1
|
| 179 |
+
val_check_interval: 2000
|
| 180 |
+
num_sanity_val_steps: 0
|
| 181 |
+
exp_manager:
|
| 182 |
+
use_datetime_version: false
|
| 183 |
+
exp_dir: ./openflam_sortformer_train
|
| 184 |
+
name: openflam_sortformer_train
|
| 185 |
+
resume_if_exists: true
|
| 186 |
+
resume_from_checkpoint: null
|
| 187 |
+
resume_ignore_no_checkpoint: true
|
| 188 |
+
create_tensorboard_logger: true
|
| 189 |
+
create_checkpoint_callback: true
|
| 190 |
+
create_wandb_logger: false
|
| 191 |
+
checkpoint_callback_params:
|
| 192 |
+
monitor: val_der
|
| 193 |
+
mode: min
|
| 194 |
+
save_top_k: 5
|
| 195 |
+
every_n_train_steps: 2000
|
| 196 |
+
every_n_epochs: 0
|
| 197 |
+
wandb_logger_kwargs:
|
| 198 |
+
resume: true
|
| 199 |
+
name: null
|
| 200 |
+
project: null
|
| 201 |
+
|
| 202 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 203 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :openflam_sortformer_train/openflam_sortformer_train/checkpoints. Training from scratch.
|
| 204 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:655] Experiments will be logged at openflam_sortformer_train/openflam_sortformer_train
|
| 205 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:1262] TensorboardLogger has been set up
|
| 206 |
+
[NeMo W 2026-04-13 18:49:22 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 207 |
+
[NeMo I 2026-04-13 18:49:22 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 208 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:371] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo W 2026-04-13 18:49:27 openflam_sortformer_model:284] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.3125, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 211 |
+
[NeMo I 2026-04-13 18:49:27 openflam_sortformer_model:132] Loading OpenFLAM model: v1-base, pretrained=True
|
| 212 |
+
[NeMo I 2026-04-13 18:49:38 openflam_sortformer_model:182] OpenFLAM HTSAT encoder: 4 stages total, first 0 frozen, last 4 trainable
|
| 213 |
+
[NeMo I 2026-04-13 18:49:43 modelPT:830] Optimizer config = AdamW (
|
| 214 |
+
Parameter Group 0
|
| 215 |
+
amsgrad: False
|
| 216 |
+
betas: (0.9, 0.98)
|
| 217 |
+
capturable: False
|
| 218 |
+
decoupled_weight_decay: True
|
| 219 |
+
differentiable: False
|
| 220 |
+
eps: 1e-08
|
| 221 |
+
foreach: None
|
| 222 |
+
fused: None
|
| 223 |
+
lr: 2e-05
|
| 224 |
+
maximize: False
|
| 225 |
+
weight_decay: 0.001
|
| 226 |
+
)
|
| 227 |
+
[NeMo I 2026-04-13 18:49:43 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7f82856aaba0>"
|
| 228 |
+
will be used during training (effective maximum steps = 16000) -
|
| 229 |
+
Parameters :
|
| 230 |
+
(warmup_steps: 2500
|
| 231 |
+
warmup_ratio: null
|
| 232 |
+
min_lr: 1.0e-06
|
| 233 |
+
max_steps: 16000
|
| 234 |
+
)
|
| 235 |
+
[NeMo I 2026-04-13 20:13:39 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776111219.4453285.
|
| 236 |
+
[NeMo I 2026-04-13 21:38:50 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116330.4900186.
|
| 237 |
+
[NeMo I 2026-04-13 21:38:52 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776116332.1641943.
|
| 238 |
+
[NeMo I 2026-04-13 23:06:23 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121583.5533047.
|
| 239 |
+
[NeMo I 2026-04-13 23:06:25 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776121585.1908693.
|
| 240 |
+
[NeMo I 2026-04-14 00:34:32 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126872.013106.
|
| 241 |
+
[NeMo I 2026-04-14 00:34:33 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776126873.6344168.
|
| 242 |
+
[NeMo I 2026-04-14 02:01:04 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132064.0732667.
|
| 243 |
+
[NeMo I 2026-04-14 02:01:05 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776132065.7425685.
|
| 244 |
+
[NeMo I 2026-04-14 03:26:54 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137214.8947742.
|
| 245 |
+
[NeMo I 2026-04-14 03:26:56 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776137216.571593.
|
openflam_sortformer_train/version_0/events.out.tfevents.1776106183.aa77be2546cc.6641.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b012681cc9b2db7e02c55703a59a4e2907b43281c8fc25798c0fc1cf62e14bb
|
| 3 |
+
size 7667179
|
openflam_sortformer_train/version_0/hparams.yaml
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cfg:
|
| 2 |
+
sample_rate: 48000
|
| 3 |
+
pil_weight: 0.5
|
| 4 |
+
ats_weight: 0.5
|
| 5 |
+
max_num_of_spks: 4
|
| 6 |
+
openflam_model_name: v1-base
|
| 7 |
+
openflam_pretrained: true
|
| 8 |
+
openflam_freeze_layers: 0
|
| 9 |
+
openflam_ckpt_cache_dir: /tmp/openflam
|
| 10 |
+
rttm_unit_10ms_frame_count: 31
|
| 11 |
+
der_collar: 0.25
|
| 12 |
+
der_ignore_overlap: true
|
| 13 |
+
model_defaults:
|
| 14 |
+
fc_d_model: 1024
|
| 15 |
+
tf_d_model: 192
|
| 16 |
+
train_ds:
|
| 17 |
+
manifest_filepath: null
|
| 18 |
+
sample_rate: 48000
|
| 19 |
+
num_spks: 4
|
| 20 |
+
session_len_sec: 45
|
| 21 |
+
shift_sec: 4
|
| 22 |
+
soft_label_thres: 0.5
|
| 23 |
+
soft_targets: false
|
| 24 |
+
labels: null
|
| 25 |
+
batch_size: 24
|
| 26 |
+
shuffle: true
|
| 27 |
+
num_workers: 1
|
| 28 |
+
validation_mode: false
|
| 29 |
+
use_hf_streaming: true
|
| 30 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 31 |
+
hf_configs:
|
| 32 |
+
- CHiME6
|
| 33 |
+
- Dipco
|
| 34 |
+
- ICSI
|
| 35 |
+
- M3SD
|
| 36 |
+
- NOTSOFAR
|
| 37 |
+
- aishell4
|
| 38 |
+
- aishell5
|
| 39 |
+
- alimeeting
|
| 40 |
+
- ami_ihm
|
| 41 |
+
- ami_sdm
|
| 42 |
+
- callhome
|
| 43 |
+
- msdwild
|
| 44 |
+
- voxconverse
|
| 45 |
+
hf_split: train
|
| 46 |
+
shuffle_seed: 42
|
| 47 |
+
shuffle_buffer_size: 4
|
| 48 |
+
prefetch_factor: 4
|
| 49 |
+
persistent_workers: true
|
| 50 |
+
prefetch_rows: 8
|
| 51 |
+
use_lhotse: false
|
| 52 |
+
use_bucketing: true
|
| 53 |
+
num_buckets: 10
|
| 54 |
+
bucket_duration_bins:
|
| 55 |
+
- 10
|
| 56 |
+
- 20
|
| 57 |
+
- 30
|
| 58 |
+
- 40
|
| 59 |
+
- 50
|
| 60 |
+
- 60
|
| 61 |
+
- 70
|
| 62 |
+
- 80
|
| 63 |
+
- 90
|
| 64 |
+
pin_memory: true
|
| 65 |
+
min_duration: 10
|
| 66 |
+
max_duration: 90
|
| 67 |
+
batch_duration: 400
|
| 68 |
+
quadratic_duration: 1200
|
| 69 |
+
bucket_buffer_size: 20000
|
| 70 |
+
window_stride: 0.3125
|
| 71 |
+
subsampling_factor: 1
|
| 72 |
+
validation_ds:
|
| 73 |
+
manifest_filepath: null
|
| 74 |
+
is_tarred: false
|
| 75 |
+
tarred_audio_filepaths: null
|
| 76 |
+
sample_rate: 48000
|
| 77 |
+
num_spks: 4
|
| 78 |
+
session_len_sec: 45
|
| 79 |
+
shift_sec: 4
|
| 80 |
+
soft_label_thres: 0.5
|
| 81 |
+
soft_targets: false
|
| 82 |
+
labels: null
|
| 83 |
+
batch_size: 24
|
| 84 |
+
shuffle: false
|
| 85 |
+
num_workers: 1
|
| 86 |
+
validation_mode: true
|
| 87 |
+
use_hf_streaming: true
|
| 88 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 89 |
+
hf_configs:
|
| 90 |
+
- alm_benchmark
|
| 91 |
+
hf_split: train
|
| 92 |
+
shuffle_seed: 42
|
| 93 |
+
shuffle_buffer_size: 100
|
| 94 |
+
prefetch_factor: 4
|
| 95 |
+
persistent_workers: true
|
| 96 |
+
prefetch_rows: 4
|
| 97 |
+
use_lhotse: false
|
| 98 |
+
use_bucketing: false
|
| 99 |
+
drop_last: false
|
| 100 |
+
pin_memory: true
|
| 101 |
+
window_stride: 0.3125
|
| 102 |
+
subsampling_factor: 1
|
| 103 |
+
test_ds:
|
| 104 |
+
manifest_filepath: null
|
| 105 |
+
is_tarred: false
|
| 106 |
+
tarred_audio_filepaths: null
|
| 107 |
+
sample_rate: 48000
|
| 108 |
+
num_spks: 4
|
| 109 |
+
session_len_sec: 90
|
| 110 |
+
soft_label_thres: 0.5
|
| 111 |
+
soft_targets: false
|
| 112 |
+
labels: null
|
| 113 |
+
batch_size: 24
|
| 114 |
+
shuffle: false
|
| 115 |
+
seq_eval_mode: true
|
| 116 |
+
num_workers: 1
|
| 117 |
+
validation_mode: true
|
| 118 |
+
use_lhotse: false
|
| 119 |
+
use_bucketing: false
|
| 120 |
+
drop_last: false
|
| 121 |
+
pin_memory: true
|
| 122 |
+
window_stride: 0.3125
|
| 123 |
+
subsampling_factor: 1
|
| 124 |
+
sortformer_modules:
|
| 125 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 126 |
+
num_spks: 4
|
| 127 |
+
dropout_rate: 0.1
|
| 128 |
+
fc_d_model: 1024
|
| 129 |
+
tf_d_model: 192
|
| 130 |
+
subsampling_factor: 1
|
| 131 |
+
encoder:
|
| 132 |
+
d_model: 1024
|
| 133 |
+
subsampling_factor: 1
|
| 134 |
+
transformer_encoder:
|
| 135 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 136 |
+
num_layers: 18
|
| 137 |
+
hidden_size: 192
|
| 138 |
+
inner_size: 768
|
| 139 |
+
num_attention_heads: 8
|
| 140 |
+
attn_score_dropout: 0.5
|
| 141 |
+
attn_layer_dropout: 0.5
|
| 142 |
+
ffn_dropout: 0.5
|
| 143 |
+
hidden_act: relu
|
| 144 |
+
pre_ln: false
|
| 145 |
+
pre_ln_final_layer_norm: true
|
| 146 |
+
loss:
|
| 147 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 148 |
+
weight: null
|
| 149 |
+
reduction: mean
|
| 150 |
+
lr: 2.0e-05
|
| 151 |
+
optim:
|
| 152 |
+
name: adamw
|
| 153 |
+
lr: 2.0e-05
|
| 154 |
+
betas:
|
| 155 |
+
- 0.9
|
| 156 |
+
- 0.98
|
| 157 |
+
weight_decay: 0.001
|
| 158 |
+
sched:
|
| 159 |
+
name: InverseSquareRootAnnealing
|
| 160 |
+
warmup_steps: 2500
|
| 161 |
+
warmup_ratio: null
|
| 162 |
+
min_lr: 1.0e-06
|