| datamodule: |
| _target_: look2hear.datas.datamodule.DataModule |
| batch_size: 1 |
| num_workers: 8 |
| pin_memory: true |
| DataClass: |
| _target_: look2hear.datas.datasets.waveform.WaveformDataClass |
| train_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tr |
| val_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/cv |
| test_json_dir: /home/likai/ssd/Look2hear/examples/CTCNet/LRS2/tt |
| n_src: 1 |
| task: enhancement |
| sample_rate: 16000 |
| segment: 4 |
| is_drop: false |
| normalize_audio: false |
| augmentation: false |
| audiomodel: |
| _target_: look2hear.models.ctcnet.CTCNet |
| encoder_type: ConvolutionalEncoder |
| decoder_type: ConvolutionalDecoder |
| audio_channels: 1 |
| audio_encoder_channels: 512 |
| audio_encoder_kernels: 21 |
| audio_encoder_strides: 10 |
| audio_in_channels: 512 |
| audio_out_channels: 512 |
| audio_kernel_size: 5 |
| audio_depth: 4 |
| audio_block_type: ConvNormAct |
| audio_norm_type: gLN |
| audio_act_type: PReLU |
| audio_shared: true |
| visual_encoder_channels: 512 |
| visual_in_channels: 64 |
| visual_out_channels: 64 |
| visual_kernel_size: 3 |
| visual_depth: 4 |
| visual_block_type: ConvNormAct |
| visual_norm_type: BatchNorm1d |
| visual_act_type: PReLU |
| visual_shared: false |
| fusion_type: ConcatFusion |
| fusion_shared: false |
| n_repeats: 3 |
| m_repeats: 13 |
| mask_types: MaskGenerator |
| num_speakers: 1 |
| mask_kernel_size: 1 |
| mask_act: ReLU |
| mask_RI_split: false |
| mask_output_gate: false |
| mask_dw_gate: false |
| mask_direct: false |
| mask_is2d: false |
| videomodel: |
| _target_: look2hear.video_models.resnetmodel.ResNetVideoModel |
| activation_type: PReLU |
| pretrained: /home/likai/ssd/Look2hear/pretrain_zoo/frcnn_128_512.backbone.pth.tar |
| audio_optimizer: |
| _target_: torch.optim.AdamW |
| lr: 0.001 |
| weight_decay: 0.1 |
| audio_scheduler: |
| _target_: torch.optim.lr_scheduler.ReduceLROnPlateau |
| mode: min |
| factor: 0.5 |
| patience: 10 |
| audio_loss: |
| _target_: look2hear.losses.pitwrapper.PITLossWrapper |
| loss_func: look2hear.losses.snr.neg_sisdr |
| pit: true |
| mode: permutation-wise |
| eval_func: min |
| system: |
| _target_: look2hear.systems.single_speaker.SingleSpeaker |
| freeze_video_model: true |
| compile: false |
| exp: |
| dir: /home/likai/ssd/Look2hear/examples/CTCNet |
| name: CTCNet-1 |
| checkpoint: |
| _target_: lightning.pytorch.callbacks.ModelCheckpoint |
| dirpath: ${exp.dir}/${exp.name}/checkpoints |
| monitor: val/neg_sisdr |
| mode: min |
| verbose: true |
| save_top_k: 1 |
| save_last: true |
| filename: '{epoch}-{val/neg_sisdr:.4f}' |
| logger: |
| _target_: lightning.pytorch.loggers.WandbLogger |
| name: ${exp.name} |
| save_dir: ${exp.dir}/${exp.name}/logs |
| offline: true |
| project: Look2hear |
| trainer: |
| _target_: lightning.pytorch.Trainer |
| devices: |
| - 0 |
| max_epochs: 5 |
| sync_batchnorm: true |
| gradient_clip_val: 5.0 |
| default_root_dir: ${exp.dir}/${exp.name}/ |
| accelerator: cuda |
| limit_train_batches: 0.001 |
| limit_val_batches: 0.001 |
| fast_dev_run: false |
| precision: bf16-mixed |
|
|