| { | |
| "decoder_config": { | |
| "image_dim": 4, | |
| "patch_size": 2, | |
| "decoder_style": "dit", | |
| "num_layers": 12, | |
| "hidden_size": 768, | |
| "intermediate_size": 3072, | |
| "num_heads": 12, | |
| "decode_mode": "diffusion", | |
| "qk_norm": true, | |
| "attn_bias": false, | |
| "use_final_norm": true | |
| }, | |
| "encoder_hidden_size": 768, | |
| "latent_h": 32, | |
| "latent_w": 32, | |
| "latent_mean": [ | |
| -0.69, | |
| -0.48, | |
| -0.6, | |
| 0.28 | |
| ], | |
| "latent_std": [ | |
| 12.38, | |
| 11.22, | |
| 7.93, | |
| 21.22 | |
| ], | |
| "pretrain_mode": "imagenet", | |
| "source_ckpt": "ttvidt-dit-pretrain/tsiyjsvh/checkpoints/epoch=19-step=100000.ckpt" | |
| } |