Text-to-Video
Diffusers
Safetensors
English
efficient
mobile video generation
dit
pyramidal diffusion
karnewar's picture
Rest of the model
b7b8a47 verified
raw
history blame contribute delete
657 Bytes
{
"_class_name": "PyramidDiffusionMMDiT",
"_diffusers_version": "0.34.0.dev0",
"add_temp_pos_embed": true,
"attention_head_dim": 64,
"caption_projection_dim": 1536,
"gradient_checkpointing_ratio": 0.6,
"in_channels": 16,
"interp_condition_pos": true,
"joint_attention_dim": 4096,
"max_num_frames": 200,
"num_attention_heads": 24,
"num_layers": 18,
"patch_size": 2,
"pooled_projection_dim": 2048,
"pos_embed_max_size": 192,
"pos_embed_type": "sincos",
"qk_norm": "rms_norm",
"sample_size": 128,
"temp_pos_embed_type": "rope",
"use_gradient_checkpointing": false,
"use_t5_mask": true,
"use_temporal_causal": true
}