Text-to-Video
Diffusers
Safetensors
English
efficient
mobile video generation
dit
pyramidal diffusion
File size: 657 Bytes
b7b8a47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
{
  "_class_name": "PyramidDiffusionMMDiT",
  "_diffusers_version": "0.34.0.dev0",
  "add_temp_pos_embed": true,
  "attention_head_dim": 64,
  "caption_projection_dim": 1536,
  "gradient_checkpointing_ratio": 0.6,
  "in_channels": 16,
  "interp_condition_pos": true,
  "joint_attention_dim": 4096,
  "max_num_frames": 200,
  "num_attention_heads": 24,
  "num_layers": 18,
  "patch_size": 2,
  "pooled_projection_dim": 2048,
  "pos_embed_max_size": 192,
  "pos_embed_type": "sincos",
  "qk_norm": "rms_norm",
  "sample_size": 128,
  "temp_pos_embed_type": "rope",
  "use_gradient_checkpointing": false,
  "use_t5_mask": true,
  "use_temporal_causal": true
}