{
  "model_type": "void-transformer",
  "source": "netflix-void",
  "architecture": "CogVideoXTransformer3DModel",
  "passes": [
    "void_pass1",
    "void_pass2"
  ],
  "notes": {
    "patch_embed": "Linear with in_dim=384 (in_channels=48 [16 latent + 16 VAE-mask + 16 VAE-video] * patch_volume=8).",
    "base_model": "Uses VAE and T5 from CogVideoX-Fun-V1.5-5b-InP."
  }
}