{ "metadata": { "total_size": 376368524 }, "weight_map": { "connector.fc1.bias": "ema.safetensors", "connector.fc1.weight": "ema.safetensors", "connector.fc2.bias": "ema.safetensors", "connector.fc2.weight": "ema.safetensors", "language_model.lm_head.weight": "ema.safetensors", "language_model.model.embed_tokens.weight": "ema.safetensors", "language_model.model.layers.0.input_layernorm.weight": "ema.safetensors", "language_model.model.layers.0.input_layernorm_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.mlp.down_proj.weight": "ema.safetensors", "language_model.model.layers.0.mlp.gate_proj.weight": "ema.safetensors", "language_model.model.layers.0.mlp.up_proj.weight": "ema.safetensors", "language_model.model.layers.0.mlp_moe_gen.down_proj.weight": "ema.safetensors", "language_model.model.layers.0.mlp_moe_gen.gate_proj.weight": "ema.safetensors", "language_model.model.layers.0.mlp_moe_gen.up_proj.weight": "ema.safetensors", "language_model.model.layers.0.post_attention_layernorm.weight": "ema.safetensors", "language_model.model.layers.0.post_attention_layernorm_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.k_norm.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.k_norm_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.k_proj.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.k_proj.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.k_proj_moe_gen.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.k_proj_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.o_proj.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.o_proj_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.q_norm.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.q_norm_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.q_proj.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.q_proj.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.q_proj_moe_gen.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.q_proj_moe_gen.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.v_proj.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.v_proj.weight": "ema.safetensors", "language_model.model.layers.0.self_attn.v_proj_moe_gen.bias": "ema.safetensors", "language_model.model.layers.0.self_attn.v_proj_moe_gen.weight": "ema.safetensors", "language_model.model.norm.weight": "ema.safetensors", "language_model.model.norm_moe_gen.weight": "ema.safetensors", "latent_pos_embed.pos_embed": "ema.safetensors", "llm2vae.bias": "ema.safetensors", "llm2vae.weight": "ema.safetensors", "time_embedder.mlp.0.bias": "ema.safetensors", "time_embedder.mlp.0.weight": "ema.safetensors", "time_embedder.mlp.2.bias": "ema.safetensors", "time_embedder.mlp.2.weight": "ema.safetensors", "vae2llm.bias": "ema.safetensors", "vae2llm.weight": "ema.safetensors", "vit_model.vision_model.embeddings.patch_embedding.bias": "ema.safetensors", "vit_model.vision_model.embeddings.patch_embedding.weight": "ema.safetensors", "vit_model.vision_model.embeddings.position_embedding.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.layer_norm1.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.layer_norm1.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.layer_norm2.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.layer_norm2.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.mlp.fc1.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.mlp.fc1.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.mlp.fc2.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.mlp.fc2.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.k_proj.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.k_proj.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.out_proj.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.out_proj.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.q_proj.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.q_proj.weight": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.v_proj.bias": "ema.safetensors", "vit_model.vision_model.encoder.layers.0.self_attn.v_proj.weight": "ema.safetensors", "vit_model.vision_model.post_layernorm.bias": "ema.safetensors", "vit_model.vision_model.post_layernorm.weight": "ema.safetensors", "vit_pos_embed.pos_embed": "ema.safetensors", "decoder.conv_in.bias": "ae.safetensors", "decoder.conv_in.weight": "ae.safetensors", "decoder.conv_out.bias": "ae.safetensors", "decoder.conv_out.weight": "ae.safetensors", "decoder.mid.attn_1.k.bias": "ae.safetensors", "decoder.mid.attn_1.k.weight": "ae.safetensors", "decoder.mid.attn_1.norm.bias": "ae.safetensors", "decoder.mid.attn_1.norm.weight": "ae.safetensors", "decoder.mid.attn_1.proj_out.bias": "ae.safetensors", "decoder.mid.attn_1.proj_out.weight": "ae.safetensors", "decoder.mid.attn_1.q.bias": "ae.safetensors", "decoder.mid.attn_1.q.weight": "ae.safetensors", "decoder.mid.attn_1.v.bias": "ae.safetensors", "decoder.mid.attn_1.v.weight": "ae.safetensors", "decoder.mid.block_1.conv1.bias": "ae.safetensors", "decoder.mid.block_1.conv1.weight": "ae.safetensors", "decoder.mid.block_1.conv2.bias": "ae.safetensors", "decoder.mid.block_1.conv2.weight": "ae.safetensors", "decoder.mid.block_1.norm1.bias": "ae.safetensors", "decoder.mid.block_1.norm1.weight": "ae.safetensors", "decoder.mid.block_1.norm2.bias": "ae.safetensors", "decoder.mid.block_1.norm2.weight": "ae.safetensors", "decoder.mid.block_2.conv1.bias": "ae.safetensors", "decoder.mid.block_2.conv1.weight": "ae.safetensors", "decoder.mid.block_2.conv2.bias": "ae.safetensors", "decoder.mid.block_2.conv2.weight": "ae.safetensors", "decoder.mid.block_2.norm1.bias": "ae.safetensors", "decoder.mid.block_2.norm1.weight": "ae.safetensors", "decoder.mid.block_2.norm2.bias": "ae.safetensors", "decoder.mid.block_2.norm2.weight": "ae.safetensors", "decoder.norm_out.bias": "ae.safetensors", "decoder.norm_out.weight": "ae.safetensors", "decoder.up.0.block.0.conv1.bias": "ae.safetensors", "decoder.up.0.block.0.conv1.weight": "ae.safetensors", "decoder.up.0.block.0.conv2.bias": "ae.safetensors", "decoder.up.0.block.0.conv2.weight": "ae.safetensors", "decoder.up.0.block.0.nin_shortcut.bias": "ae.safetensors", "decoder.up.0.block.0.nin_shortcut.weight": "ae.safetensors", "decoder.up.0.block.0.norm1.bias": "ae.safetensors", "decoder.up.0.block.0.norm1.weight": "ae.safetensors", "decoder.up.0.block.0.norm2.bias": "ae.safetensors", "decoder.up.0.block.0.norm2.weight": "ae.safetensors", "decoder.up.0.block.1.conv1.bias": "ae.safetensors", "decoder.up.0.block.1.conv1.weight": "ae.safetensors", "decoder.up.0.block.1.conv2.bias": "ae.safetensors", "decoder.up.0.block.1.conv2.weight": "ae.safetensors", "decoder.up.0.block.1.norm1.bias": "ae.safetensors", "decoder.up.0.block.1.norm1.weight": "ae.safetensors", "decoder.up.0.block.1.norm2.bias": "ae.safetensors", "decoder.up.0.block.1.norm2.weight": "ae.safetensors", "decoder.up.0.block.2.conv1.bias": "ae.safetensors", "decoder.up.0.block.2.conv1.weight": "ae.safetensors", "decoder.up.0.block.2.conv2.bias": "ae.safetensors", "decoder.up.0.block.2.conv2.weight": "ae.safetensors", "decoder.up.0.block.2.norm1.bias": "ae.safetensors", "decoder.up.0.block.2.norm1.weight": "ae.safetensors", "decoder.up.0.block.2.norm2.bias": "ae.safetensors", "decoder.up.0.block.2.norm2.weight": "ae.safetensors", "decoder.up.1.block.0.conv1.bias": "ae.safetensors", "decoder.up.1.block.0.conv1.weight": "ae.safetensors", "decoder.up.1.block.0.conv2.bias": "ae.safetensors", "decoder.up.1.block.0.conv2.weight": "ae.safetensors", "decoder.up.1.block.0.nin_shortcut.bias": "ae.safetensors", "decoder.up.1.block.0.nin_shortcut.weight": "ae.safetensors", "decoder.up.1.block.0.norm1.bias": "ae.safetensors", "decoder.up.1.block.0.norm1.weight": "ae.safetensors", "decoder.up.1.block.0.norm2.bias": "ae.safetensors", "decoder.up.1.block.0.norm2.weight": "ae.safetensors", "decoder.up.1.block.1.conv1.bias": "ae.safetensors", "decoder.up.1.block.1.conv1.weight": "ae.safetensors", "decoder.up.1.block.1.conv2.bias": "ae.safetensors", "decoder.up.1.block.1.conv2.weight": "ae.safetensors", "decoder.up.1.block.1.norm1.bias": "ae.safetensors", "decoder.up.1.block.1.norm1.weight": "ae.safetensors", "decoder.up.1.block.1.norm2.bias": "ae.safetensors", "decoder.up.1.block.1.norm2.weight": "ae.safetensors", "decoder.up.1.block.2.conv1.bias": "ae.safetensors", "decoder.up.1.block.2.conv1.weight": "ae.safetensors", "decoder.up.1.block.2.conv2.bias": "ae.safetensors", "decoder.up.1.block.2.conv2.weight": "ae.safetensors", "decoder.up.1.block.2.norm1.bias": "ae.safetensors", "decoder.up.1.block.2.norm1.weight": "ae.safetensors", "decoder.up.1.block.2.norm2.bias": "ae.safetensors", "decoder.up.1.block.2.norm2.weight": "ae.safetensors", "decoder.up.1.upsample.conv.bias": "ae.safetensors", "decoder.up.1.upsample.conv.weight": "ae.safetensors", "decoder.up.2.block.0.conv1.bias": "ae.safetensors", "decoder.up.2.block.0.conv1.weight": "ae.safetensors", "decoder.up.2.block.0.conv2.bias": "ae.safetensors", "decoder.up.2.block.0.conv2.weight": "ae.safetensors", "decoder.up.2.block.0.norm1.bias": "ae.safetensors", "decoder.up.2.block.0.norm1.weight": "ae.safetensors", "decoder.up.2.block.0.norm2.bias": "ae.safetensors", "decoder.up.2.block.0.norm2.weight": "ae.safetensors", "decoder.up.2.block.1.conv1.bias": "ae.safetensors", "decoder.up.2.block.1.conv1.weight": "ae.safetensors", "decoder.up.2.block.1.conv2.bias": "ae.safetensors", "decoder.up.2.block.1.conv2.weight": "ae.safetensors", "decoder.up.2.block.1.norm1.bias": "ae.safetensors", "decoder.up.2.block.1.norm1.weight": "ae.safetensors", "decoder.up.2.block.1.norm2.bias": "ae.safetensors", "decoder.up.2.block.1.norm2.weight": "ae.safetensors", "decoder.up.2.block.2.conv1.bias": "ae.safetensors", "decoder.up.2.block.2.conv1.weight": "ae.safetensors", "decoder.up.2.block.2.conv2.bias": "ae.safetensors", "decoder.up.2.block.2.conv2.weight": "ae.safetensors", "decoder.up.2.block.2.norm1.bias": "ae.safetensors", "decoder.up.2.block.2.norm1.weight": "ae.safetensors", "decoder.up.2.block.2.norm2.bias": "ae.safetensors", "decoder.up.2.block.2.norm2.weight": "ae.safetensors", "decoder.up.2.upsample.conv.bias": "ae.safetensors", "decoder.up.2.upsample.conv.weight": "ae.safetensors", "decoder.up.3.block.0.conv1.bias": "ae.safetensors", "decoder.up.3.block.0.conv1.weight": "ae.safetensors", "decoder.up.3.block.0.conv2.bias": "ae.safetensors", "decoder.up.3.block.0.conv2.weight": "ae.safetensors", "decoder.up.3.block.0.norm1.bias": "ae.safetensors", "decoder.up.3.block.0.norm1.weight": "ae.safetensors", "decoder.up.3.block.0.norm2.bias": "ae.safetensors", "decoder.up.3.block.0.norm2.weight": "ae.safetensors", "decoder.up.3.block.1.conv1.bias": "ae.safetensors", "decoder.up.3.block.1.conv1.weight": "ae.safetensors", "decoder.up.3.block.1.conv2.bias": "ae.safetensors", "decoder.up.3.block.1.conv2.weight": "ae.safetensors", "decoder.up.3.block.1.norm1.bias": "ae.safetensors", "decoder.up.3.block.1.norm1.weight": "ae.safetensors", "decoder.up.3.block.1.norm2.bias": "ae.safetensors", "decoder.up.3.block.1.norm2.weight": "ae.safetensors", "decoder.up.3.block.2.conv1.bias": "ae.safetensors", "decoder.up.3.block.2.conv1.weight": "ae.safetensors", "decoder.up.3.block.2.conv2.bias": "ae.safetensors", "decoder.up.3.block.2.conv2.weight": "ae.safetensors", "decoder.up.3.block.2.norm1.bias": "ae.safetensors", "decoder.up.3.block.2.norm1.weight": "ae.safetensors", "decoder.up.3.block.2.norm2.bias": "ae.safetensors", "decoder.up.3.block.2.norm2.weight": "ae.safetensors", "decoder.up.3.upsample.conv.bias": "ae.safetensors", "decoder.up.3.upsample.conv.weight": "ae.safetensors", "encoder.conv_in.bias": "ae.safetensors", "encoder.conv_in.weight": "ae.safetensors", "encoder.conv_out.bias": "ae.safetensors", "encoder.conv_out.weight": "ae.safetensors", "encoder.down.0.block.0.conv1.bias": "ae.safetensors", "encoder.down.0.block.0.conv1.weight": "ae.safetensors", "encoder.down.0.block.0.conv2.bias": "ae.safetensors", "encoder.down.0.block.0.conv2.weight": "ae.safetensors", "encoder.down.0.block.0.norm1.bias": "ae.safetensors", "encoder.down.0.block.0.norm1.weight": "ae.safetensors", "encoder.down.0.block.0.norm2.bias": "ae.safetensors", "encoder.down.0.block.0.norm2.weight": "ae.safetensors", "encoder.down.0.block.1.conv1.bias": "ae.safetensors", "encoder.down.0.block.1.conv1.weight": "ae.safetensors", "encoder.down.0.block.1.conv2.bias": "ae.safetensors", "encoder.down.0.block.1.conv2.weight": "ae.safetensors", "encoder.down.0.block.1.norm1.bias": "ae.safetensors", "encoder.down.0.block.1.norm1.weight": "ae.safetensors", "encoder.down.0.block.1.norm2.bias": "ae.safetensors", "encoder.down.0.block.1.norm2.weight": "ae.safetensors", "encoder.down.0.downsample.conv.bias": "ae.safetensors", "encoder.down.0.downsample.conv.weight": "ae.safetensors", "encoder.down.1.block.0.conv1.bias": "ae.safetensors", "encoder.down.1.block.0.conv1.weight": "ae.safetensors", "encoder.down.1.block.0.conv2.bias": "ae.safetensors", "encoder.down.1.block.0.conv2.weight": "ae.safetensors", "encoder.down.1.block.0.nin_shortcut.bias": "ae.safetensors", "encoder.down.1.block.0.nin_shortcut.weight": "ae.safetensors", "encoder.down.1.block.0.norm1.bias": "ae.safetensors", "encoder.down.1.block.0.norm1.weight": "ae.safetensors", "encoder.down.1.block.0.norm2.bias": "ae.safetensors", "encoder.down.1.block.0.norm2.weight": "ae.safetensors", "encoder.down.1.block.1.conv1.bias": "ae.safetensors", "encoder.down.1.block.1.conv1.weight": "ae.safetensors", "encoder.down.1.block.1.conv2.bias": "ae.safetensors", "encoder.down.1.block.1.conv2.weight": "ae.safetensors", "encoder.down.1.block.1.norm1.bias": "ae.safetensors", "encoder.down.1.block.1.norm1.weight": "ae.safetensors", "encoder.down.1.block.1.norm2.bias": "ae.safetensors", "encoder.down.1.block.1.norm2.weight": "ae.safetensors", "encoder.down.1.downsample.conv.bias": "ae.safetensors", "encoder.down.1.downsample.conv.weight": "ae.safetensors", "encoder.down.2.block.0.conv1.bias": "ae.safetensors", "encoder.down.2.block.0.conv1.weight": "ae.safetensors", "encoder.down.2.block.0.conv2.bias": "ae.safetensors", "encoder.down.2.block.0.conv2.weight": "ae.safetensors", "encoder.down.2.block.0.nin_shortcut.bias": "ae.safetensors", "encoder.down.2.block.0.nin_shortcut.weight": "ae.safetensors", "encoder.down.2.block.0.norm1.bias": "ae.safetensors", "encoder.down.2.block.0.norm1.weight": "ae.safetensors", "encoder.down.2.block.0.norm2.bias": "ae.safetensors", "encoder.down.2.block.0.norm2.weight": "ae.safetensors", "encoder.down.2.block.1.conv1.bias": "ae.safetensors", "encoder.down.2.block.1.conv1.weight": "ae.safetensors", "encoder.down.2.block.1.conv2.bias": "ae.safetensors", "encoder.down.2.block.1.conv2.weight": "ae.safetensors", "encoder.down.2.block.1.norm1.bias": "ae.safetensors", "encoder.down.2.block.1.norm1.weight": "ae.safetensors", "encoder.down.2.block.1.norm2.bias": "ae.safetensors", "encoder.down.2.block.1.norm2.weight": "ae.safetensors", "encoder.down.2.downsample.conv.bias": "ae.safetensors", "encoder.down.2.downsample.conv.weight": "ae.safetensors", "encoder.down.3.block.0.conv1.bias": "ae.safetensors", "encoder.down.3.block.0.conv1.weight": "ae.safetensors", "encoder.down.3.block.0.conv2.bias": "ae.safetensors", "encoder.down.3.block.0.conv2.weight": "ae.safetensors", "encoder.down.3.block.0.norm1.bias": "ae.safetensors", "encoder.down.3.block.0.norm1.weight": "ae.safetensors", "encoder.down.3.block.0.norm2.bias": "ae.safetensors", "encoder.down.3.block.0.norm2.weight": "ae.safetensors", "encoder.down.3.block.1.conv1.bias": "ae.safetensors", "encoder.down.3.block.1.conv1.weight": "ae.safetensors", "encoder.down.3.block.1.conv2.bias": "ae.safetensors", "encoder.down.3.block.1.conv2.weight": "ae.safetensors", "encoder.down.3.block.1.norm1.bias": "ae.safetensors", "encoder.down.3.block.1.norm1.weight": "ae.safetensors", "encoder.down.3.block.1.norm2.bias": "ae.safetensors", "encoder.down.3.block.1.norm2.weight": "ae.safetensors", "encoder.mid.attn_1.k.bias": "ae.safetensors", "encoder.mid.attn_1.k.weight": "ae.safetensors", "encoder.mid.attn_1.norm.bias": "ae.safetensors", "encoder.mid.attn_1.norm.weight": "ae.safetensors", "encoder.mid.attn_1.proj_out.bias": "ae.safetensors", "encoder.mid.attn_1.proj_out.weight": "ae.safetensors", "encoder.mid.attn_1.q.bias": "ae.safetensors", "encoder.mid.attn_1.q.weight": "ae.safetensors", "encoder.mid.attn_1.v.bias": "ae.safetensors", "encoder.mid.attn_1.v.weight": "ae.safetensors", "encoder.mid.block_1.conv1.bias": "ae.safetensors", "encoder.mid.block_1.conv1.weight": "ae.safetensors", "encoder.mid.block_1.conv2.bias": "ae.safetensors", "encoder.mid.block_1.conv2.weight": "ae.safetensors", "encoder.mid.block_1.norm1.bias": "ae.safetensors", "encoder.mid.block_1.norm1.weight": "ae.safetensors", "encoder.mid.block_1.norm2.bias": "ae.safetensors", "encoder.mid.block_1.norm2.weight": "ae.safetensors", "encoder.mid.block_2.conv1.bias": "ae.safetensors", "encoder.mid.block_2.conv1.weight": "ae.safetensors", "encoder.mid.block_2.conv2.bias": "ae.safetensors", "encoder.mid.block_2.conv2.weight": "ae.safetensors", "encoder.mid.block_2.norm1.bias": "ae.safetensors", "encoder.mid.block_2.norm1.weight": "ae.safetensors", "encoder.mid.block_2.norm2.bias": "ae.safetensors", "encoder.mid.block_2.norm2.weight": "ae.safetensors", "encoder.norm_out.bias": "ae.safetensors", "encoder.norm_out.weight": "ae.safetensors" } }