{ "decoder_config": { "image_dim": 4, "patch_size": 2, "decoder_style": "dit", "num_layers": 28, "hidden_size": 1152, "intermediate_size": 3456, "num_heads": 16, "decode_mode": "diffusion" }, "encoder_hidden_size": 768, "latent_h": 32, "latent_w": 32, "latent_mean": [ -0.69, -0.48, -0.6, 0.28 ], "latent_std": [ 12.38, 11.22, 7.93, 21.22 ], "pretrain_mode": "video", "source_ckpt": "ttvidt-dit-pretrain/2v3xqhf2/checkpoints/epoch=11-step=80000.ckpt" }