{ "decoder_config": { "image_dim": 4, "patch_size": 2, "decoder_style": "dit", "num_layers": 12, "hidden_size": 768, "intermediate_size": 3072, "num_heads": 12, "decode_mode": "diffusion", "qk_norm": true, "attn_bias": false, "use_final_norm": false }, "encoder_hidden_size": 768, "latent_h": 32, "latent_w": 32, "latent_mean": [ -0.69, -0.48, -0.6, 0.28 ], "latent_std": [ 12.38, 11.22, 7.93, 21.22 ], "pretrain_mode": "video", "source_ckpt": "ttvidt-dit-pretrain/1c4ts6z2/checkpoints/epoch=14-step=100000.ckpt" }