| { |
| "embed_tokens.weight": { |
| "shape": [ |
| 28385280 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 28385280 |
| }, |
| "layers.0.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.0.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.0.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.0.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.0.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.0.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.0.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.1.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.1.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.1.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.1.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.1.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.1.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.1.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.2.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.2.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.2.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.2.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.2.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.2.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.2.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.3.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.3.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.3.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.3.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.3.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.3.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.3.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.4.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.4.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.4.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.4.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.4.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.4.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.4.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.5.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.5.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.5.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.5.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.5.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.5.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.5.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.6.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.6.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.6.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.6.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.6.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.6.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.6.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.7.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.7.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.7.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.7.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.7.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.7.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.7.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.8.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.8.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.8.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.8.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.8.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.8.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.8.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.9.self_attn.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.9.self_attn.k_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.9.self_attn.v_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.9.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.9.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.9.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.9.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.10.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.10.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.10.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.10.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.10.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.10.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.10.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.10.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.11.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.11.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.11.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.11.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.11.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.11.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.11.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.11.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.12.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.12.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.12.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.12.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.12.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.12.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.12.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.12.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.13.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.13.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.13.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.13.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.13.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.13.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.13.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.13.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.14.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.14.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.14.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.14.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.14.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.14.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.14.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.14.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.15.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.15.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.15.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.15.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.15.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.15.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.15.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.15.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.16.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.16.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.16.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.16.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.16.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.16.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.16.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.16.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.17.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.17.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.17.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.17.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.17.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.17.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.17.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.17.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.18.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.18.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.18.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.18.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.18.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.18.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.18.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.18.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.19.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.19.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.19.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.19.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.19.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.19.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.19.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.19.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.20.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.20.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.20.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.20.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.20.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.20.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.20.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.20.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.21.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.21.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.21.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.21.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.21.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.21.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.21.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.21.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.22.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.22.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.22.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.22.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.22.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.22.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.22.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.22.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.23.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.23.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.23.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.23.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.23.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.23.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.23.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.23.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.24.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.24.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.24.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.24.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.24.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.24.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.24.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.24.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.25.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.25.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.25.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.25.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.25.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.25.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.25.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.25.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.26.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.26.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.26.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.26.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.26.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.26.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.26.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.26.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.27.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.27.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.27.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.27.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.27.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.27.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.27.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.27.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.28.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.28.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.28.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.28.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.28.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.28.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.28.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.28.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.29.self_attn.q_a_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.29.self_attn.q_b_proj.weight": { |
| "shape": [ |
| 165888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 165888 |
| }, |
| "layers.29.self_attn.kv_a_proj_with_mqa.weight": { |
| "shape": [ |
| 92160 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 92160 |
| }, |
| "layers.29.self_attn.kv_b_proj.weight": { |
| "shape": [ |
| 110592 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 110592 |
| }, |
| "layers.29.self_attn.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "layers.29.gate_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.29.up_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "layers.29.down_proj.weight": { |
| "shape": [ |
| 884736 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 884736 |
| }, |
| "lm_head.weight": { |
| "shape": [ |
| 28385280 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 28385280 |
| }, |
| "flow_head.patch_embed.weight": { |
| "shape": [ |
| 9216 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 9216 |
| }, |
| "flow_head.time_embed.0.weight": { |
| "shape": [ |
| 147456 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 147456 |
| }, |
| "flow_head.time_embed.2.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.0.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.0.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.0.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.1.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.1.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.1.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.1.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.2.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.2.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.2.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.2.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.3.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.3.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.3.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.3.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.4.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.4.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.4.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.4.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.5.adaln.adaLN_modulation.1.weight": { |
| "shape": [ |
| 1990656 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1990656 |
| }, |
| "flow_head.layers.5.q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.cross_q_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.cross_k_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.cross_v_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.cross_o_proj.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "flow_head.layers.5.mlp.0.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.layers.5.mlp.2.weight": { |
| "shape": [ |
| 1327104 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 1327104 |
| }, |
| "flow_head.final_adaLN.1.weight": { |
| "shape": [ |
| 663552 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 663552 |
| }, |
| "flow_head.final_proj.weight": { |
| "shape": [ |
| 9216 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 9216 |
| }, |
| "gen_image_encoder.proj.0.weight": { |
| "shape": [ |
| 9216 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 9216 |
| }, |
| "gen_image_encoder.proj.2.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| }, |
| "vision_projector.0.weight": { |
| "shape": [ |
| 7077888 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 7077888 |
| }, |
| "vision_projector.2.weight": { |
| "shape": [ |
| 331776 |
| ], |
| "block_size": 64, |
| "dtype": "nf4", |
| "n_elements": 331776 |
| } |
| } |