HaoyiZhu commited on
Commit
6f60830
·
verified ·
1 Parent(s): a1c1c1f

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +69 -0
config.yaml ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model:
2
+ model: SanaMSVideoCamCtrl_1600M_P1_D20
3
+ image_size: 720
4
+ aspect_ratio_type: ASPECT_RATIO_VIDEO_720_MS_DIV32
5
+ mixed_precision: bf16
6
+ fp32_attention: true
7
+ multi_scale: true
8
+ camctrl_type: BidirectionalGDNUCPESinglePathLiteLABothTriton
9
+ attn_type: BidirectionalGDNTriton
10
+ softmax_every_n: 4
11
+ linear_head_dim: 112
12
+ conv_kernel_size: 4
13
+ k_conv_only: true
14
+ ffn_type: GLUMBConvTemp
15
+ t_kernel_size: 3
16
+ mlp_acts:
17
+ - silu
18
+ - silu
19
+ -
20
+ mlp_ratio: 3
21
+ use_pe: true
22
+ pos_embed_type: wan_rope
23
+ qk_norm: true
24
+ cross_norm: true
25
+ class_dropout_prob: 0.0
26
+ chunk_split_strategy: first_chunk_plus_one
27
+ cam_attn_compress: 1
28
+ init_cam_from_base: true
29
+ use_chunk_plucker_post_attn: true
30
+ chunk_plucker_channels: 48
31
+ chunk_plucker_post_attn_blocks: 20
32
+
33
+ vae:
34
+ vae_type: LTX2VAE_diffusers
35
+ # AutoencoderKLLTX2Video.from_pretrained(<root>, subfolder="vae") expects
36
+ # a parent directory containing a ``vae/`` diffusers folder. The public
37
+ # release repo hosts that ``vae/`` folder at its root.
38
+ vae_pretrained: hf://Efficient-Large-Model/SANA-WM_bidirectional
39
+ weight_dtype: bfloat16
40
+ vae_latent_dim: 128
41
+ vae_downsample_rate: 32
42
+ vae_stride: [8, 32, 32]
43
+ use_framewise_encoding: true
44
+ use_framewise_decoding: true
45
+ tile_sample_stride_num_frames: 64
46
+ tile_sample_min_num_frames: 96
47
+
48
+ text_encoder:
49
+ text_encoder_name: gemma-2-2b-it
50
+ y_norm: true
51
+ y_norm_scale_factor: 0.01
52
+ model_max_length: 300
53
+ chi_prompt:
54
+ - 'Given a user prompt, generate an "Enhanced prompt" that provides detailed visual descriptions suitable for image generation. Evaluate the level of detail in the user prompt:'
55
+ - '- If the prompt is simple, focus on adding specifics about colors, shapes, sizes, textures, and spatial relationships to create vivid and concrete scenes.'
56
+ - '- If the prompt is already detailed, refine and enhance the existing details slightly without overcomplicating.'
57
+ - 'Here are examples of how to transform or refine prompts:'
58
+ - '- User Prompt: A cat sleeping -> Enhanced: A small, fluffy white cat curled up in a round shape, sleeping peacefully on a warm sunny windowsill, surrounded by pots of blooming red flowers.'
59
+ - '- User Prompt: A busy city street -> Enhanced: A bustling city street scene at dusk, featuring glowing street lamps, a diverse crowd of people in colorful clothing, and a double-decker bus passing by towering glass skyscrapers.'
60
+ - 'Please generate only the enhanced description for the prompt below and avoid including any additional commentary or evaluations:'
61
+ - 'User Prompt: '
62
+
63
+ scheduler:
64
+ predict_flow_v: true
65
+ noise_schedule: linear_flow
66
+ pred_sigma: false
67
+ flow_shift: 9.95
68
+ inference_flow_shift: 9.8
69
+ vis_sampler: flow_dpm-solver