miae-b-seq-ft / config.json
dexiongc's picture
Upload folder using huggingface_hub
e364dcc verified
{
"seed": 42,
"debug": false,
"wandb": true,
"pretrained_model_path": "pretrained/new/mae_base_mask0.9_dec512d2b_seq.ckpt",
"datamodule": {
"batch_size": 64,
"pin_memory": true,
"num_workers": 12,
"train_transform": {
"_target_": "tedbench.data.transform.Compose",
"transforms": [
{
"_target_": "tedbench.data.transform.RandomCrop",
"size": 512
},
{
"_target_": "tedbench.data.transform.RandomNoise",
"std": 0.2,
"mean": 0.0
}
]
},
"_target_": "tedbench.data.TEDLightningDataset",
"root": "./datasets/ted",
"dataset_name": "ted"
},
"trainer": {
"_target_": "pytorch_lightning.Trainer",
"accelerator": "auto",
"max_steps": 18300,
"strategy": "auto",
"devices": "auto",
"default_root_dir": "${logs.path}",
"num_sanity_val_steps": 0,
"accumulate_grad_batches": 2
},
"train": {
"optimizer": {
"_target_": "torch.optim.AdamW",
"lr": 0.0016,
"weight_decay": 0.1,
"betas": [
0.9,
0.95
]
},
"lr_scheduler": {
"_target_": "tedbench.lr_schedulers.get_cosine_schedule_with_warmup",
"warmup_steps": 1830,
"max_steps": "${trainer.max_steps}"
},
"loss": {
"_target_": "torch.nn.CrossEntropyLoss",
"label_smoothing": 0.0
},
"llrd": 0.8,
"ckpt_path": null
},
"model": {
"_target_": "tedbench.model.miae_encoder_model",
"name": "miae_b",
"num_classes": 965,
"avg_pool": false,
"use_seq_input": true
},
"logs": {
"prefix": "logs/finetune/${datamodule.dataset_name}/${seed}",
"path": "${logs.prefix}/runs/${now:%Y-%m-%d}_${now:%H-%M-%S}"
},
"mode": {},
"_model_class": "miae_classifier"
}