File size: 1,558 Bytes
7557af0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 | ---
task_name: audioset
data:
indexes_dict: "hdf5s/indexes/full_train.h5"
sampler_type: balanced_sampler
anchor_segment_detect_mode: max_area # "max_area" | "random"
sample_rate: 32000
frames_per_second: 100
segment_seconds: 2.0
classes_num: 527
augmentation:
match_energy: True
mix_num: 2
sound_event_detection:
model_type: Cnn14_DecisionLevelMax
freeze: True
query_net:
model_type: Cnn14_Wrapper # "Cnn14_Wrapper" | "AdaptiveCnn14_Wrapper"
base_checkpoint_type: "Cnn14"
freeze_base: True
freeze_adaptor: False
bottleneck_type: at_soft # "embedding" | "at_soft"
outputs_num: 527
ss_model:
model_type: ResUNet30
input_channels: 1
output_channels: 1
train:
num_workers: 16
loss_type: l1_wav
optimizer:
optimizer_type: AdamW
learning_rate: 1e-3
lr_lambda_type: constant_warm_up # "constant_warm_up" | "linear_warm_up"
warm_up_steps: 10000
reduce_lr_steps: 1000000
batch_size_per_device: 16
precision: 32
steps_per_epoch: 10000 # Every 10000 steps is called an epoch
evaluate_step_frequency: 20000 # Evaluate every #evaluate_step_frequency steps
save_step_frequency: 100000 # Save every #save_step_frequency steps
early_stop_steps: 10000001
random_seed: 1234
resume_checkpoint_path: ""
evaluate:
balanced_train_eval_dir: "evaluation/audioset/2s_segments_balanced_train"
test_eval_dir: "evaluation/audioset/2s_segments_test"
max_eval_per_class: 10
|