Tinman-SmolOmni-MLA-256M / quantization_metadata.json
TinmanLabSL's picture
NF4 quantized weights (3.8× compression, 109MB)
9a35913 verified
{
"embed_tokens.weight": {
"shape": [
28385280
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 28385280
},
"layers.0.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.0.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.0.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.0.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.0.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.0.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.0.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.1.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.1.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.1.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.1.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.1.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.1.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.1.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.2.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.2.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.2.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.2.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.2.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.2.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.2.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.3.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.3.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.3.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.3.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.3.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.3.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.3.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.4.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.4.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.4.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.4.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.4.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.4.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.4.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.5.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.5.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.5.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.5.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.5.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.5.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.5.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.6.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.6.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.6.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.6.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.6.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.6.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.6.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.7.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.7.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.7.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.7.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.7.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.7.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.7.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.8.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.8.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.8.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.8.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.8.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.8.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.8.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.9.self_attn.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.9.self_attn.k_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.9.self_attn.v_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.9.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.9.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.9.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.9.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.10.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.10.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.10.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.10.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.10.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.10.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.10.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.10.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.11.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.11.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.11.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.11.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.11.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.11.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.11.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.11.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.12.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.12.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.12.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.12.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.12.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.12.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.12.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.12.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.13.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.13.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.13.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.13.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.13.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.13.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.13.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.13.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.14.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.14.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.14.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.14.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.14.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.14.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.14.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.14.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.15.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.15.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.15.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.15.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.15.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.15.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.15.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.15.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.16.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.16.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.16.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.16.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.16.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.16.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.16.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.16.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.17.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.17.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.17.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.17.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.17.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.17.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.17.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.17.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.18.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.18.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.18.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.18.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.18.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.18.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.18.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.18.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.19.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.19.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.19.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.19.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.19.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.19.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.19.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.19.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.20.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.20.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.20.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.20.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.20.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.20.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.20.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.20.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.21.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.21.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.21.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.21.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.21.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.21.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.21.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.21.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.22.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.22.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.22.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.22.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.22.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.22.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.22.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.22.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.23.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.23.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.23.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.23.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.23.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.23.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.23.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.23.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.24.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.24.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.24.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.24.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.24.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.24.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.24.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.24.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.25.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.25.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.25.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.25.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.25.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.25.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.25.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.25.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.26.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.26.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.26.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.26.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.26.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.26.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.26.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.26.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.27.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.27.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.27.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.27.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.27.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.27.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.27.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.27.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.28.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.28.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.28.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.28.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.28.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.28.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.28.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.28.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.29.self_attn.q_a_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.29.self_attn.q_b_proj.weight": {
"shape": [
165888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 165888
},
"layers.29.self_attn.kv_a_proj_with_mqa.weight": {
"shape": [
92160
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 92160
},
"layers.29.self_attn.kv_b_proj.weight": {
"shape": [
110592
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 110592
},
"layers.29.self_attn.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"layers.29.gate_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.29.up_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"layers.29.down_proj.weight": {
"shape": [
884736
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 884736
},
"lm_head.weight": {
"shape": [
28385280
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 28385280
},
"flow_head.patch_embed.weight": {
"shape": [
9216
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 9216
},
"flow_head.time_embed.0.weight": {
"shape": [
147456
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 147456
},
"flow_head.time_embed.2.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.0.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.0.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.0.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.1.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.1.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.1.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.1.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.2.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.2.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.2.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.2.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.3.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.3.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.3.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.3.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.4.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.4.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.4.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.4.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.5.adaln.adaLN_modulation.1.weight": {
"shape": [
1990656
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1990656
},
"flow_head.layers.5.q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.cross_q_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.cross_k_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.cross_v_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.cross_o_proj.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"flow_head.layers.5.mlp.0.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.layers.5.mlp.2.weight": {
"shape": [
1327104
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 1327104
},
"flow_head.final_adaLN.1.weight": {
"shape": [
663552
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 663552
},
"flow_head.final_proj.weight": {
"shape": [
9216
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 9216
},
"gen_image_encoder.proj.0.weight": {
"shape": [
9216
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 9216
},
"gen_image_encoder.proj.2.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
},
"vision_projector.0.weight": {
"shape": [
7077888
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 7077888
},
"vision_projector.2.weight": {
"shape": [
331776
],
"block_size": 64,
"dtype": "nf4",
"n_elements": 331776
}
}