Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0-last.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1348-epoch=0.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1382-epoch=0.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1458-epoch=0.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1609-epoch=0.ckpt +3 -0
- peav_sortformer_train/checkpoints/peav_sortformer_train.nemo +3 -0
- peav_sortformer_train/cmd-args.log +1 -0
- peav_sortformer_train/git-info.log +1 -0
- peav_sortformer_train/lightning_logs.txt +23 -0
- peav_sortformer_train/nemo_error_log.txt +8 -0
- peav_sortformer_train/nemo_log_globalrank-0_localrank-0.txt +254 -0
- peav_sortformer_train/nemo_log_globalrank-1_localrank-1.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-2_localrank-2.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-3_localrank-3.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-4_localrank-4.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-5_localrank-5.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-6_localrank-6.txt +248 -0
- peav_sortformer_train/nemo_log_globalrank-7_localrank-7.txt +248 -0
- peav_sortformer_train/version_0/events.out.tfevents.1776078731.260d0e88e8f5.93828.0 +3 -0
- peav_sortformer_train/version_0/hparams.yaml +161 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
peav_sortformer_train/checkpoints/peav_sortformer_train.nemo filter=lfs diff=lfs merge=lfs -text
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0-last.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:168fcc556f339689efb16b22d8d6222a7eb76e971f0a19d81d9599f0b5c5ef96
|
| 3 |
+
size 1251559602
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1314-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ced2d1f4a2f261033d596a69d55b7097eb95c48ebee8517dc46d7fc89df98d3
|
| 3 |
+
size 1251559602
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1348-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52aecebedd98ce1bb126732390254bd12caaaed91698800be432849bcc370e0e
|
| 3 |
+
size 1251559602
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1382-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2345c2778a51324a64f7bafd924f804da4bf2918abb17186531f0d1f65da2333
|
| 3 |
+
size 1251559602
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1458-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab1023cbc9cad4e83bd782fc2e6bd9429facf9a4d18b7a2a88274b0bd7039ff9
|
| 3 |
+
size 1251559027
|
peav_sortformer_train/checkpoints/peav_sortformer_train--val_der=0.1609-epoch=0.ckpt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:873ebc0a3928e07d4e42efc2c42c772e9a3a76de3c12619de3c57030316c2add
|
| 3 |
+
size 1251559474
|
peav_sortformer_train/checkpoints/peav_sortformer_train.nemo
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fbfcc298079572797332202d997c1a9db7a969b0b71a414903cc0965e8c31fff
|
| 3 |
+
size 500469760
|
peav_sortformer_train/cmd-args.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
scripts/peav_sortformer_train.py --config-path=../configs --config-name=peav_sortformer_4spk.yaml exp_manager.name=peav_sortformer_train exp_manager.exp_dir=./peav_sortformer_train
|
peav_sortformer_train/git-info.log
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
commit hash: b1e2e995a240deef69a6b3a9e40693059b976a6c
|
peav_sortformer_train/lightning_logs.txt
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
|
| 2 |
+
|
| 3 |
+
| Name | Type | Params | Mode
|
| 4 |
+
---------------------------------------------------------------------
|
| 5 |
+
0 | dac_vae | DacEncoderVAE | 27.7 M | train
|
| 6 |
+
1 | data_proj | Linear | 99.1 K | train
|
| 7 |
+
2 | encoder | PEAVEncoderWrapper | 89.1 M | train
|
| 8 |
+
3 | sortformer_modules | SortformerModules | 187 K | train
|
| 9 |
+
4 | transformer_encoder | TransformerEncoder | 8.0 M | train
|
| 10 |
+
5 | loss | BCELoss | 0 | train
|
| 11 |
+
6 | _accuracy_test | MultiBinaryAccuracy | 0 | train
|
| 12 |
+
7 | _accuracy_train | MultiBinaryAccuracy | 0 | train
|
| 13 |
+
8 | _accuracy_valid | MultiBinaryAccuracy | 0 | train
|
| 14 |
+
9 | _accuracy_test_ats | MultiBinaryAccuracy | 0 | train
|
| 15 |
+
10 | _accuracy_train_ats | MultiBinaryAccuracy | 0 | train
|
| 16 |
+
11 | _accuracy_valid_ats | MultiBinaryAccuracy | 0 | train
|
| 17 |
+
---------------------------------------------------------------------
|
| 18 |
+
93.8 M Trainable params
|
| 19 |
+
31.2 M Non-trainable params
|
| 20 |
+
125 M Total params
|
| 21 |
+
500.237 Total estimated model params size (MB)
|
| 22 |
+
545 Modules in train mode
|
| 23 |
+
0 Modules in eval mode
|
peav_sortformer_train/nemo_error_log.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 2 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 3 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 4 |
+
[NeMo W 2026-04-13 11:11:25 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 5 |
+
[NeMo W 2026-04-13 11:12:11 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
|
| 6 |
+
|
| 7 |
+
[NeMo W 2026-04-13 11:12:54 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
|
| 8 |
+
|
peav_sortformer_train/nemo_log_globalrank-0_localrank-0.txt
ADDED
|
@@ -0,0 +1,254 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:25 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo I 2026-04-13 11:11:25 exp_manager:594] ExpManager schema
|
| 202 |
+
[NeMo I 2026-04-13 11:11:25 exp_manager:595] {'explicit_log_dir': None, 'exp_dir': None, 'name': None, 'version': None, 'use_datetime_version': True, 'resume_if_exists': False, 'resume_past_end': False, 'resume_ignore_no_checkpoint': False, 'resume_from_checkpoint': None, 'create_tensorboard_logger': True, 'summary_writer_kwargs': None, 'create_wandb_logger': False, 'wandb_logger_kwargs': None, 'create_mlflow_logger': False, 'mlflow_logger_kwargs': {'experiment_name': None, 'run_name': None, 'tracking_uri': None, 'tags': None, 'save_dir': './mlruns', 'prefix': '', 'artifact_location': None, 'run_id': None, 'log_model': False}, 'create_dllogger_logger': False, 'dllogger_logger_kwargs': {'verbose': False, 'stdout': False, 'json_file': './dllogger.json'}, 'create_clearml_logger': False, 'clearml_logger_kwargs': {'project': None, 'task': None, 'connect_pytorch': False, 'model_name': None, 'tags': None, 'log_model': False, 'log_cfg': False, 'log_metrics': False}, 'create_neptune_logger': False, 'neptune_logger_kwargs': None, 'create_checkpoint_callback': True, 'checkpoint_callback_params': {'filepath': None, 'dirpath': None, 'filename': None, 'monitor': 'val_loss', 'verbose': True, 'save_last': True, 'save_top_k': 3, 'save_weights_only': False, 'mode': 'min', 'auto_insert_metric_name': True, 'every_n_epochs': 1, 'every_n_train_steps': None, 'train_time_interval': None, 'prefix': None, 'postfix': '.nemo', 'save_best_model': False, 'always_save_nemo': False, 'save_nemo_on_train_end': True, 'model_parallel_size': None, 'save_on_train_epoch_end': False, 'async_save': False, 'save_last_n_optim_states': -1}, 'create_early_stopping_callback': False, 'create_ipl_epoch_stopper_callback': False, 'early_stopping_callback_params': {'monitor': 'val_loss', 'mode': 'min', 'min_delta': 0.001, 'patience': 10, 'verbose': True, 'strict': True, 'check_finite': True, 'stopping_threshold': None, 'divergence_threshold': None, 'check_on_train_epoch_end': None, 'log_rank_zero_only': False}, 'ipl_epoch_stopper_callback_params': {'enable_stop': True, 'stop_every_n_epochs': 1}, 'create_preemption_callback': True, 'files_to_copy': None, 'log_step_timing': True, 'log_delta_step_timing': False, 'step_timing_kwargs': {'reduction': 'mean', 'sync_cuda': False, 'buffer_size': 1}, 'log_local_rank_0_only': False, 'log_global_rank_0_only': False, 'disable_validation_on_resume': True, 'ema': {'enable': False, 'decay': 0.999, 'cpu_offload': False, 'validate_original_weights': False, 'every_n_steps': 1}, 'max_time_per_run': None, 'seconds_to_sleep': 5.0, 'create_straggler_detection_callback': False, 'straggler_detection_params': {'report_time_interval': 300.0, 'calc_relative_gpu_perf': True, 'calc_individual_gpu_perf': True, 'num_gpu_perf_scores_to_log': 5, 'gpu_relative_perf_threshold': 0.7, 'gpu_individual_perf_threshold': 0.7, 'stop_if_detected': False}, 'create_fault_tolerance_callback': False, 'fault_tolerance': {'workload_check_interval': 5.0, 'initial_rank_heartbeat_timeout': 3600.0, 'rank_heartbeat_timeout': 2700.0, 'calculate_timeouts': True, 'safety_factor': 5.0, 'rank_termination_signal': <Signals.SIGKILL: 9>, 'log_level': 'INFO', 'max_rank_restarts': 0, 'max_subsequent_job_failures': 0, 'additional_ft_launcher_args': '', 'simulated_fault': None}, 'log_tflops_per_sec_per_gpu': True}
|
| 203 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 204 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 205 |
+
[NeMo I 2026-04-13 11:11:25 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 206 |
+
[NeMo I 2026-04-13 11:11:25 exp_manager:1262] TensorboardLogger has been set up
|
| 207 |
+
[NeMo W 2026-04-13 11:11:25 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 208 |
+
[NeMo I 2026-04-13 11:11:25 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 209 |
+
[NeMo I 2026-04-13 11:11:25 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 210 |
+
[NeMo I 2026-04-13 11:11:25 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 211 |
+
[NeMo W 2026-04-13 11:11:25 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 212 |
+
[NeMo I 2026-04-13 11:11:25 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 213 |
+
[NeMo I 2026-04-13 11:11:30 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 214 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 215 |
+
Parameter Group 0
|
| 216 |
+
amsgrad: False
|
| 217 |
+
betas: (0.9, 0.98)
|
| 218 |
+
capturable: False
|
| 219 |
+
decoupled_weight_decay: True
|
| 220 |
+
differentiable: False
|
| 221 |
+
eps: 1e-08
|
| 222 |
+
foreach: None
|
| 223 |
+
fused: None
|
| 224 |
+
lr: 2e-05
|
| 225 |
+
maximize: False
|
| 226 |
+
weight_decay: 0.001
|
| 227 |
+
)
|
| 228 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7828e016d130>"
|
| 229 |
+
will be used during training (effective maximum steps = 16000) -
|
| 230 |
+
Parameters :
|
| 231 |
+
(warmup_steps: 2500
|
| 232 |
+
warmup_ratio: null
|
| 233 |
+
min_lr: 1.0e-06
|
| 234 |
+
max_steps: 16000
|
| 235 |
+
)
|
| 236 |
+
[NeMo W 2026-04-13 11:12:11 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
|
| 237 |
+
|
| 238 |
+
[NeMo W 2026-04-13 11:12:54 nemo_logging:364] /venv/main/lib/python3.12/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:424: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=15` in the `DataLoader` to improve performance.
|
| 239 |
+
|
| 240 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118162.
|
| 241 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.535194.
|
| 242 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395654.
|
| 243 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733094.
|
| 244 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493985.
|
| 245 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072822.
|
| 246 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771407.
|
| 247 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.213656.
|
| 248 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.942046.
|
| 249 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.637489.
|
| 250 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543324.
|
| 251 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.729116.
|
| 252 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408545.
|
| 253 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822026.
|
| 254 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9141016.
|
peav_sortformer_train/nemo_log_globalrank-1_localrank-1.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:10 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x79ec3de84830>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118205.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351932.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395537.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330906.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493804.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072822.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771307.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.213653.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420345.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374886.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543212.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291036.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085367.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822011.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.914093.
|
peav_sortformer_train/nemo_log_globalrank-2_localrank-2.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:40 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:40 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:40 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:40 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:40 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:40 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:45 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:45 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:45 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:45 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:07 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x73feafc4b740>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118193.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351932.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395525.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330956.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493802.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.307285.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771292.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136545.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420338.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374915.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543217.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291129.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085355.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822057.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140923.
|
peav_sortformer_train/nemo_log_globalrank-3_localrank-3.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:38 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:38 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:38 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:38 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:38 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:38 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:38 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:43 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:43 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:43 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:43 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:11:53 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x713d95e20740>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118226.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351937.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395597.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733098.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493806.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072832.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.977131.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136538.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420352.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374905.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543234.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291148.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408538.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.182203.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140947.
|
peav_sortformer_train/nemo_log_globalrank-4_localrank-4.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:45 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:45 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:06 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x7c7c4a659460>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118166.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351925.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395554.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.733096.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493816.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072834.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771283.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136545.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420424.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374896.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543217.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291138.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085386.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822033.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140968.
|
peav_sortformer_train/nemo_log_globalrank-5_localrank-5.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:11 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x703b76086360>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118183.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351942.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.239556.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330983.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.449381.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072872.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771423.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136571.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420366.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.637491.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543243.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291174.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085383.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822045.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140959.
|
peav_sortformer_train/nemo_log_globalrank-6_localrank-6.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:10 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x72cf44392b40>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.111822.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351958.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395577.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7330983.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.4493845.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.307287.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771316.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136576.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.9420383.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374958.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543286.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291193.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.408541.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822076.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9140973.
|
peav_sortformer_train/nemo_log_globalrank-7_localrank-7.txt
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[NeMo I 2026-04-13 11:11:39 peav_sortformer_train:31] Hydra config: name: PEAVSortformerDiarizer
|
| 2 |
+
num_workers: 1
|
| 3 |
+
batch_size: 24
|
| 4 |
+
model:
|
| 5 |
+
sample_rate: 48000
|
| 6 |
+
pil_weight: 0.5
|
| 7 |
+
ats_weight: 0.5
|
| 8 |
+
max_num_of_spks: 4
|
| 9 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 10 |
+
peav_pretrained: true
|
| 11 |
+
peav_freeze_layers: 0
|
| 12 |
+
rttm_unit_10ms_frame_count: 4
|
| 13 |
+
der_collar: 0.25
|
| 14 |
+
der_ignore_overlap: true
|
| 15 |
+
model_defaults:
|
| 16 |
+
fc_d_model: 768
|
| 17 |
+
tf_d_model: 192
|
| 18 |
+
train_ds:
|
| 19 |
+
manifest_filepath: null
|
| 20 |
+
sample_rate: ${model.sample_rate}
|
| 21 |
+
num_spks: ${model.max_num_of_spks}
|
| 22 |
+
session_len_sec: 45
|
| 23 |
+
shift_sec: 4
|
| 24 |
+
soft_label_thres: 0.5
|
| 25 |
+
soft_targets: false
|
| 26 |
+
labels: null
|
| 27 |
+
batch_size: ${batch_size}
|
| 28 |
+
shuffle: true
|
| 29 |
+
num_workers: ${num_workers}
|
| 30 |
+
validation_mode: false
|
| 31 |
+
use_hf_streaming: true
|
| 32 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 33 |
+
hf_configs:
|
| 34 |
+
- CHiME6
|
| 35 |
+
- Dipco
|
| 36 |
+
- ICSI
|
| 37 |
+
- M3SD
|
| 38 |
+
- NOTSOFAR
|
| 39 |
+
- aishell4
|
| 40 |
+
- aishell5
|
| 41 |
+
- alimeeting
|
| 42 |
+
- ami_ihm
|
| 43 |
+
- ami_sdm
|
| 44 |
+
- callhome
|
| 45 |
+
- msdwild
|
| 46 |
+
- voxconverse
|
| 47 |
+
hf_split: train
|
| 48 |
+
shuffle_seed: 42
|
| 49 |
+
shuffle_buffer_size: 4
|
| 50 |
+
prefetch_factor: 4
|
| 51 |
+
persistent_workers: true
|
| 52 |
+
prefetch_rows: 8
|
| 53 |
+
use_lhotse: false
|
| 54 |
+
use_bucketing: true
|
| 55 |
+
num_buckets: 10
|
| 56 |
+
bucket_duration_bins:
|
| 57 |
+
- 10
|
| 58 |
+
- 20
|
| 59 |
+
- 30
|
| 60 |
+
- 40
|
| 61 |
+
- 50
|
| 62 |
+
- 60
|
| 63 |
+
- 70
|
| 64 |
+
- 80
|
| 65 |
+
- 90
|
| 66 |
+
pin_memory: true
|
| 67 |
+
min_duration: 10
|
| 68 |
+
max_duration: 90
|
| 69 |
+
batch_duration: 400
|
| 70 |
+
quadratic_duration: 1200
|
| 71 |
+
bucket_buffer_size: 20000
|
| 72 |
+
window_stride: 0.04
|
| 73 |
+
subsampling_factor: 1
|
| 74 |
+
validation_ds:
|
| 75 |
+
manifest_filepath: null
|
| 76 |
+
is_tarred: false
|
| 77 |
+
tarred_audio_filepaths: null
|
| 78 |
+
sample_rate: ${model.sample_rate}
|
| 79 |
+
num_spks: ${model.max_num_of_spks}
|
| 80 |
+
session_len_sec: 45
|
| 81 |
+
shift_sec: 4
|
| 82 |
+
soft_label_thres: 0.5
|
| 83 |
+
soft_targets: false
|
| 84 |
+
labels: null
|
| 85 |
+
batch_size: ${batch_size}
|
| 86 |
+
shuffle: false
|
| 87 |
+
num_workers: ${num_workers}
|
| 88 |
+
validation_mode: true
|
| 89 |
+
use_hf_streaming: true
|
| 90 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 91 |
+
hf_configs:
|
| 92 |
+
- alm_benchmark
|
| 93 |
+
hf_split: train
|
| 94 |
+
shuffle_seed: 42
|
| 95 |
+
shuffle_buffer_size: 100
|
| 96 |
+
prefetch_factor: 4
|
| 97 |
+
persistent_workers: true
|
| 98 |
+
prefetch_rows: 4
|
| 99 |
+
use_lhotse: false
|
| 100 |
+
use_bucketing: false
|
| 101 |
+
drop_last: false
|
| 102 |
+
pin_memory: true
|
| 103 |
+
window_stride: 0.04
|
| 104 |
+
subsampling_factor: 1
|
| 105 |
+
test_ds:
|
| 106 |
+
manifest_filepath: null
|
| 107 |
+
is_tarred: false
|
| 108 |
+
tarred_audio_filepaths: null
|
| 109 |
+
sample_rate: ${model.sample_rate}
|
| 110 |
+
num_spks: ${model.max_num_of_spks}
|
| 111 |
+
session_len_sec: 90
|
| 112 |
+
soft_label_thres: 0.5
|
| 113 |
+
soft_targets: false
|
| 114 |
+
labels: null
|
| 115 |
+
batch_size: ${batch_size}
|
| 116 |
+
shuffle: false
|
| 117 |
+
seq_eval_mode: true
|
| 118 |
+
num_workers: ${num_workers}
|
| 119 |
+
validation_mode: true
|
| 120 |
+
use_lhotse: false
|
| 121 |
+
use_bucketing: false
|
| 122 |
+
drop_last: false
|
| 123 |
+
pin_memory: true
|
| 124 |
+
window_stride: 0.04
|
| 125 |
+
subsampling_factor: 1
|
| 126 |
+
sortformer_modules:
|
| 127 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 128 |
+
num_spks: ${model.max_num_of_spks}
|
| 129 |
+
dropout_rate: 0.1
|
| 130 |
+
fc_d_model: ${model.model_defaults.fc_d_model}
|
| 131 |
+
tf_d_model: ${model.model_defaults.tf_d_model}
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
encoder:
|
| 134 |
+
d_model: ${model.model_defaults.fc_d_model}
|
| 135 |
+
subsampling_factor: 1
|
| 136 |
+
transformer_encoder:
|
| 137 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 138 |
+
num_layers: 18
|
| 139 |
+
hidden_size: ${model.model_defaults.tf_d_model}
|
| 140 |
+
inner_size: 768
|
| 141 |
+
num_attention_heads: 8
|
| 142 |
+
attn_score_dropout: 0.5
|
| 143 |
+
attn_layer_dropout: 0.5
|
| 144 |
+
ffn_dropout: 0.5
|
| 145 |
+
hidden_act: relu
|
| 146 |
+
pre_ln: false
|
| 147 |
+
pre_ln_final_layer_norm: true
|
| 148 |
+
loss:
|
| 149 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 150 |
+
weight: null
|
| 151 |
+
reduction: mean
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
optim:
|
| 154 |
+
name: adamw
|
| 155 |
+
lr: ${model.lr}
|
| 156 |
+
betas:
|
| 157 |
+
- 0.9
|
| 158 |
+
- 0.98
|
| 159 |
+
weight_decay: 0.001
|
| 160 |
+
sched:
|
| 161 |
+
name: InverseSquareRootAnnealing
|
| 162 |
+
warmup_steps: 2500
|
| 163 |
+
warmup_ratio: null
|
| 164 |
+
min_lr: 1.0e-06
|
| 165 |
+
trainer:
|
| 166 |
+
devices: 8
|
| 167 |
+
accelerator: gpu
|
| 168 |
+
precision: bf16-mixed
|
| 169 |
+
max_epochs: -1
|
| 170 |
+
max_steps: 16000
|
| 171 |
+
num_nodes: 1
|
| 172 |
+
strategy: ddp_find_unused_parameters_true
|
| 173 |
+
accumulate_grad_batches: 1
|
| 174 |
+
deterministic: false
|
| 175 |
+
enable_checkpointing: false
|
| 176 |
+
logger: false
|
| 177 |
+
log_every_n_steps: 1
|
| 178 |
+
val_check_interval: 2000
|
| 179 |
+
num_sanity_val_steps: 0
|
| 180 |
+
exp_manager:
|
| 181 |
+
use_datetime_version: false
|
| 182 |
+
exp_dir: ./peav_sortformer_train
|
| 183 |
+
name: peav_sortformer_train
|
| 184 |
+
resume_if_exists: true
|
| 185 |
+
resume_from_checkpoint: null
|
| 186 |
+
resume_ignore_no_checkpoint: true
|
| 187 |
+
create_tensorboard_logger: true
|
| 188 |
+
create_checkpoint_callback: true
|
| 189 |
+
create_wandb_logger: false
|
| 190 |
+
checkpoint_callback_params:
|
| 191 |
+
monitor: val_der
|
| 192 |
+
mode: min
|
| 193 |
+
save_top_k: 5
|
| 194 |
+
every_n_train_steps: 2000
|
| 195 |
+
every_n_epochs: 0
|
| 196 |
+
wandb_logger_kwargs:
|
| 197 |
+
resume: true
|
| 198 |
+
name: null
|
| 199 |
+
project: null
|
| 200 |
+
|
| 201 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1177] No version folders would be created under the log folder as 'resume_if_exists' is enabled.
|
| 202 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1022] There were no checkpoints found in checkpoint_dir or no checkpoint folder at checkpoint_dir :peav_sortformer_train/peav_sortformer_train/checkpoints. Training from scratch.
|
| 203 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:655] Experiments will be logged at peav_sortformer_train/peav_sortformer_train
|
| 204 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:1262] TensorboardLogger has been set up
|
| 205 |
+
[NeMo W 2026-04-13 11:11:39 exp_manager:1413] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to 16000. Please ensure that max_steps will run for at least 1 epochs to ensure that checkpointing will not error out.
|
| 206 |
+
[NeMo I 2026-04-13 11:11:39 exp_manager:804] TFLOPs per sec per GPU will be calculated, conditioned on supported models. Defaults to -1 upon failure.
|
| 207 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['CHiME6', 'Dipco', 'ICSI', 'M3SD', 'NOTSOFAR', 'aishell4', 'aishell5', 'alimeeting', 'ami_ihm', 'ami_sdm', 'callhome', 'msdwild', 'voxconverse'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 208 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:264] HF streaming dataloader: humanify/real_dia_dataset configs=['alm_benchmark'] split=train (shuffle_seed=42, rank_batch_size=24, world_size=8)
|
| 209 |
+
[NeMo W 2026-04-13 11:11:44 peav_sortformer_model:176] Could not load dataset as `manifest_filepath` was None. Provided config : {'manifest_filepath': None, 'is_tarred': False, 'tarred_audio_filepaths': None, 'sample_rate': 48000, 'num_spks': 4, 'session_len_sec': 90, 'soft_label_thres': 0.5, 'soft_targets': False, 'labels': None, 'batch_size': 24, 'shuffle': False, 'seq_eval_mode': True, 'num_workers': 1, 'validation_mode': True, 'use_lhotse': False, 'use_bucketing': False, 'drop_last': False, 'pin_memory': True, 'window_stride': 0.04, 'subsampling_factor': 1, 'num_speakers': 4}
|
| 210 |
+
[NeMo I 2026-04-13 11:11:44 peav_sortformer_model:65] Loading PEAV model: pe-av-small-16-frame, pretrained=True
|
| 211 |
+
[NeMo I 2026-04-13 11:12:08 peav_sortformer_model:82] PEAV encoder: 12 layers total, first 0 frozen, last 12 trainable
|
| 212 |
+
[NeMo I 2026-04-13 11:12:11 modelPT:830] Optimizer config = AdamW (
|
| 213 |
+
Parameter Group 0
|
| 214 |
+
amsgrad: False
|
| 215 |
+
betas: (0.9, 0.98)
|
| 216 |
+
capturable: False
|
| 217 |
+
decoupled_weight_decay: True
|
| 218 |
+
differentiable: False
|
| 219 |
+
eps: 1e-08
|
| 220 |
+
foreach: None
|
| 221 |
+
fused: None
|
| 222 |
+
lr: 2e-05
|
| 223 |
+
maximize: False
|
| 224 |
+
weight_decay: 0.001
|
| 225 |
+
)
|
| 226 |
+
[NeMo I 2026-04-13 11:12:11 lr_scheduler:995] Scheduler "<nemo.core.optim.lr_scheduler.InverseSquareRootAnnealing object at 0x78cb16b8e900>"
|
| 227 |
+
will be used during training (effective maximum steps = 16000) -
|
| 228 |
+
Parameters :
|
| 229 |
+
(warmup_steps: 2500
|
| 230 |
+
warmup_ratio: null
|
| 231 |
+
min_lr: 1.0e-06
|
| 232 |
+
max_steps: 16000
|
| 233 |
+
)
|
| 234 |
+
[NeMo I 2026-04-13 13:14:33 nemo_model_checkpoint:573] Checkpoint save for step 2000 started at 1776086073.1118307.
|
| 235 |
+
[NeMo I 2026-04-13 15:17:20 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093440.5351996.
|
| 236 |
+
[NeMo I 2026-04-13 15:17:21 nemo_model_checkpoint:573] Checkpoint save for step 4000 started at 1776093441.2395585.
|
| 237 |
+
[NeMo I 2026-04-13 17:20:10 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100810.7331011.
|
| 238 |
+
[NeMo I 2026-04-13 17:20:11 nemo_model_checkpoint:573] Checkpoint save for step 6000 started at 1776100811.449385.
|
| 239 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.3072908.
|
| 240 |
+
[NeMo I 2026-04-13 19:24:08 nemo_model_checkpoint:573] Checkpoint save for step 8000 started at 1776108248.9771328.
|
| 241 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.2136598.
|
| 242 |
+
[NeMo I 2026-04-13 21:26:58 nemo_model_checkpoint:573] Checkpoint save for step 10000 started at 1776115618.942039.
|
| 243 |
+
[NeMo I 2026-04-13 23:29:47 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122987.6374946.
|
| 244 |
+
[NeMo I 2026-04-13 23:29:48 nemo_model_checkpoint:573] Checkpoint save for step 12000 started at 1776122988.3543284.
|
| 245 |
+
[NeMo I 2026-04-14 01:35:29 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130529.7291188.
|
| 246 |
+
[NeMo I 2026-04-14 01:35:30 nemo_model_checkpoint:573] Checkpoint save for step 14000 started at 1776130530.4085422.
|
| 247 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.1822095.
|
| 248 |
+
[NeMo I 2026-04-14 03:39:20 nemo_model_checkpoint:573] Checkpoint save for step 16000 started at 1776137960.9141161.
|
peav_sortformer_train/version_0/events.out.tfevents.1776078731.260d0e88e8f5.93828.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00b94e3a28aa5883807f2d77d3b01f5e75bf50b51dac7fb16aebfa15e046686b
|
| 3 |
+
size 9138653
|
peav_sortformer_train/version_0/hparams.yaml
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cfg:
|
| 2 |
+
sample_rate: 48000
|
| 3 |
+
pil_weight: 0.5
|
| 4 |
+
ats_weight: 0.5
|
| 5 |
+
max_num_of_spks: 4
|
| 6 |
+
peav_checkpoint: pe-av-small-16-frame
|
| 7 |
+
peav_pretrained: true
|
| 8 |
+
peav_freeze_layers: 0
|
| 9 |
+
rttm_unit_10ms_frame_count: 4
|
| 10 |
+
der_collar: 0.25
|
| 11 |
+
der_ignore_overlap: true
|
| 12 |
+
model_defaults:
|
| 13 |
+
fc_d_model: 768
|
| 14 |
+
tf_d_model: 192
|
| 15 |
+
train_ds:
|
| 16 |
+
manifest_filepath: null
|
| 17 |
+
sample_rate: 48000
|
| 18 |
+
num_spks: 4
|
| 19 |
+
session_len_sec: 45
|
| 20 |
+
shift_sec: 4
|
| 21 |
+
soft_label_thres: 0.5
|
| 22 |
+
soft_targets: false
|
| 23 |
+
labels: null
|
| 24 |
+
batch_size: 24
|
| 25 |
+
shuffle: true
|
| 26 |
+
num_workers: 1
|
| 27 |
+
validation_mode: false
|
| 28 |
+
use_hf_streaming: true
|
| 29 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 30 |
+
hf_configs:
|
| 31 |
+
- CHiME6
|
| 32 |
+
- Dipco
|
| 33 |
+
- ICSI
|
| 34 |
+
- M3SD
|
| 35 |
+
- NOTSOFAR
|
| 36 |
+
- aishell4
|
| 37 |
+
- aishell5
|
| 38 |
+
- alimeeting
|
| 39 |
+
- ami_ihm
|
| 40 |
+
- ami_sdm
|
| 41 |
+
- callhome
|
| 42 |
+
- msdwild
|
| 43 |
+
- voxconverse
|
| 44 |
+
hf_split: train
|
| 45 |
+
shuffle_seed: 42
|
| 46 |
+
shuffle_buffer_size: 4
|
| 47 |
+
prefetch_factor: 4
|
| 48 |
+
persistent_workers: true
|
| 49 |
+
prefetch_rows: 8
|
| 50 |
+
use_lhotse: false
|
| 51 |
+
use_bucketing: true
|
| 52 |
+
num_buckets: 10
|
| 53 |
+
bucket_duration_bins:
|
| 54 |
+
- 10
|
| 55 |
+
- 20
|
| 56 |
+
- 30
|
| 57 |
+
- 40
|
| 58 |
+
- 50
|
| 59 |
+
- 60
|
| 60 |
+
- 70
|
| 61 |
+
- 80
|
| 62 |
+
- 90
|
| 63 |
+
pin_memory: true
|
| 64 |
+
min_duration: 10
|
| 65 |
+
max_duration: 90
|
| 66 |
+
batch_duration: 400
|
| 67 |
+
quadratic_duration: 1200
|
| 68 |
+
bucket_buffer_size: 20000
|
| 69 |
+
window_stride: 0.04
|
| 70 |
+
subsampling_factor: 1
|
| 71 |
+
validation_ds:
|
| 72 |
+
manifest_filepath: null
|
| 73 |
+
is_tarred: false
|
| 74 |
+
tarred_audio_filepaths: null
|
| 75 |
+
sample_rate: 48000
|
| 76 |
+
num_spks: 4
|
| 77 |
+
session_len_sec: 45
|
| 78 |
+
shift_sec: 4
|
| 79 |
+
soft_label_thres: 0.5
|
| 80 |
+
soft_targets: false
|
| 81 |
+
labels: null
|
| 82 |
+
batch_size: 24
|
| 83 |
+
shuffle: false
|
| 84 |
+
num_workers: 1
|
| 85 |
+
validation_mode: true
|
| 86 |
+
use_hf_streaming: true
|
| 87 |
+
hf_dataset_path: humanify/real_dia_dataset
|
| 88 |
+
hf_configs:
|
| 89 |
+
- alm_benchmark
|
| 90 |
+
hf_split: train
|
| 91 |
+
shuffle_seed: 42
|
| 92 |
+
shuffle_buffer_size: 100
|
| 93 |
+
prefetch_factor: 4
|
| 94 |
+
persistent_workers: true
|
| 95 |
+
prefetch_rows: 4
|
| 96 |
+
use_lhotse: false
|
| 97 |
+
use_bucketing: false
|
| 98 |
+
drop_last: false
|
| 99 |
+
pin_memory: true
|
| 100 |
+
window_stride: 0.04
|
| 101 |
+
subsampling_factor: 1
|
| 102 |
+
test_ds:
|
| 103 |
+
manifest_filepath: null
|
| 104 |
+
is_tarred: false
|
| 105 |
+
tarred_audio_filepaths: null
|
| 106 |
+
sample_rate: 48000
|
| 107 |
+
num_spks: 4
|
| 108 |
+
session_len_sec: 90
|
| 109 |
+
soft_label_thres: 0.5
|
| 110 |
+
soft_targets: false
|
| 111 |
+
labels: null
|
| 112 |
+
batch_size: 24
|
| 113 |
+
shuffle: false
|
| 114 |
+
seq_eval_mode: true
|
| 115 |
+
num_workers: 1
|
| 116 |
+
validation_mode: true
|
| 117 |
+
use_lhotse: false
|
| 118 |
+
use_bucketing: false
|
| 119 |
+
drop_last: false
|
| 120 |
+
pin_memory: true
|
| 121 |
+
window_stride: 0.04
|
| 122 |
+
subsampling_factor: 1
|
| 123 |
+
sortformer_modules:
|
| 124 |
+
_target_: nemo.collections.asr.modules.sortformer_modules.SortformerModules
|
| 125 |
+
num_spks: 4
|
| 126 |
+
dropout_rate: 0.1
|
| 127 |
+
fc_d_model: 768
|
| 128 |
+
tf_d_model: 192
|
| 129 |
+
subsampling_factor: 1
|
| 130 |
+
encoder:
|
| 131 |
+
d_model: 768
|
| 132 |
+
subsampling_factor: 1
|
| 133 |
+
transformer_encoder:
|
| 134 |
+
_target_: nemo.collections.asr.modules.transformer.transformer_encoders.TransformerEncoder
|
| 135 |
+
num_layers: 18
|
| 136 |
+
hidden_size: 192
|
| 137 |
+
inner_size: 768
|
| 138 |
+
num_attention_heads: 8
|
| 139 |
+
attn_score_dropout: 0.5
|
| 140 |
+
attn_layer_dropout: 0.5
|
| 141 |
+
ffn_dropout: 0.5
|
| 142 |
+
hidden_act: relu
|
| 143 |
+
pre_ln: false
|
| 144 |
+
pre_ln_final_layer_norm: true
|
| 145 |
+
loss:
|
| 146 |
+
_target_: nemo.collections.asr.losses.bce_loss.BCELoss
|
| 147 |
+
weight: null
|
| 148 |
+
reduction: mean
|
| 149 |
+
lr: 2.0e-05
|
| 150 |
+
optim:
|
| 151 |
+
name: adamw
|
| 152 |
+
lr: 2.0e-05
|
| 153 |
+
betas:
|
| 154 |
+
- 0.9
|
| 155 |
+
- 0.98
|
| 156 |
+
weight_decay: 0.001
|
| 157 |
+
sched:
|
| 158 |
+
name: InverseSquareRootAnnealing
|
| 159 |
+
warmup_steps: 2500
|
| 160 |
+
warmup_ratio: null
|
| 161 |
+
min_lr: 1.0e-06
|