(sigma_vla) root@C.28189995:/workspace$ cd /workspace && \ for SEED in 1 2 3 4 5 6 7; do echo "===== START CONTROL SEED ${SEED} =====" python /workspace/eval_sigma_vla_rollout.py \ --base_model_id "lerobot/pi05_base" \ --tokenizer_id "google/paligemma-3b-pt-224" \ --artifacts_repo_id "Veltraxor/Sigma" \ --output_dir "/workspace/storage/sigma_eval_out_control_seed${SEED}" \ --batch_size 4 \ --num_workers 2 \ --dtype bf16 \ --disable_telepathy \ --seed ${SEED} echo "===== END CONTROL SEED ${SEED} =====" done ===== START CONTROL SEED 1 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 5423.67it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3846 mse_chk=329.0237 mse_trj=273.8578 tau_l2=51.5918 sem_align=0.0451 batch=20 mse_vec=118.0607 mse_chk=199.8564 mse_trj=170.4615 tau_l2=51.5975 sem_align=0.0451 batch=40 mse_vec=104.9334 mse_chk=267.9076 mse_trj=232.3708 tau_l2=51.5960 sem_align=0.0451 batch=60 mse_vec=88.2954 mse_chk=240.8833 mse_trj=204.7726 tau_l2=51.5978 sem_align=0.0451 batch=80 mse_vec=111.9476 mse_chk=184.6584 mse_trj=160.4074 tau_l2=51.5981 sem_align=0.0451 batch=100 mse_vec=92.7796 mse_chk=237.7554 mse_trj=207.4145 tau_l2=51.5984 sem_align=0.0451 batch=120 mse_vec=124.7822 mse_chk=337.7815 mse_trj=274.7615 tau_l2=51.5928 sem_align=0.0451 batch=140 mse_vec=166.1161 mse_chk=275.2061 mse_trj=227.3151 tau_l2=51.5890 sem_align=0.0451 batch=160 mse_vec=130.3206 mse_chk=330.5715 mse_trj=277.0004 tau_l2=51.5919 sem_align=0.0451 batch=180 mse_vec=64.9396 mse_chk=188.2773 mse_trj=165.8396 tau_l2=51.6011 sem_align=0.0451 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83126428509286, 'avg_mse_chunk': 228.9725721159034, 'avg_mse_traj': 191.0328658931163, 'avg_tau_l2': 51.598266706940876, 'avg_semantic_text_alignment': 0.0451381394011869, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87814035587127, 'avg_hard_mse_chunk': 229.0288578102038, 'avg_hard_mse_traj': 191.06770991619527, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 1 ===== ===== START CONTROL SEED 2 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 2976.09it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3840 mse_chk=329.0235 mse_trj=273.8575 tau_l2=51.5921 sem_align=0.0730 batch=20 mse_vec=118.0604 mse_chk=199.8563 mse_trj=170.4615 tau_l2=51.5978 sem_align=0.0729 batch=40 mse_vec=104.9329 mse_chk=267.9074 mse_trj=232.3706 tau_l2=51.5963 sem_align=0.0730 batch=60 mse_vec=88.2950 mse_chk=240.8832 mse_trj=204.7724 tau_l2=51.5981 sem_align=0.0730 batch=80 mse_vec=111.9474 mse_chk=184.6583 mse_trj=160.4073 tau_l2=51.5984 sem_align=0.0729 batch=100 mse_vec=92.7791 mse_chk=237.7552 mse_trj=207.4143 tau_l2=51.5986 sem_align=0.0730 batch=120 mse_vec=124.7818 mse_chk=337.7813 mse_trj=274.7612 tau_l2=51.5931 sem_align=0.0730 batch=140 mse_vec=166.1163 mse_chk=275.2060 mse_trj=227.3149 tau_l2=51.5895 sem_align=0.0729 batch=160 mse_vec=130.3201 mse_chk=330.5714 mse_trj=277.0001 tau_l2=51.5922 sem_align=0.0730 batch=180 mse_vec=64.9392 mse_chk=188.2772 mse_trj=165.8394 tau_l2=51.6014 sem_align=0.0729 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83100062839235, 'avg_mse_chunk': 228.9724379060018, 'avg_mse_traj': 191.03270809953383, 'avg_tau_l2': 51.59860541412185, 'avg_semantic_text_alignment': 0.07294852228948424, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87787737918923, 'avg_hard_mse_chunk': 229.0287251874784, 'avg_hard_mse_traj': 191.06755317065395, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 2 ===== ===== START CONTROL SEED 3 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 7362.73it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3840 mse_chk=329.0236 mse_trj=273.8575 tau_l2=51.5922 sem_align=0.0212 batch=20 mse_vec=118.0602 mse_chk=199.8564 mse_trj=170.4615 tau_l2=51.5977 sem_align=0.0212 batch=40 mse_vec=104.9329 mse_chk=267.9075 mse_trj=232.3706 tau_l2=51.5964 sem_align=0.0212 batch=60 mse_vec=88.2949 mse_chk=240.8833 mse_trj=204.7724 tau_l2=51.5981 sem_align=0.0212 batch=80 mse_vec=111.9472 mse_chk=184.6584 mse_trj=160.4073 tau_l2=51.5983 sem_align=0.0212 batch=100 mse_vec=92.7790 mse_chk=237.7553 mse_trj=207.4142 tau_l2=51.5988 sem_align=0.0212 batch=120 mse_vec=124.7818 mse_chk=337.7814 mse_trj=274.7611 tau_l2=51.5932 sem_align=0.0212 batch=140 mse_vec=166.1163 mse_chk=275.2059 mse_trj=227.3147 tau_l2=51.5897 sem_align=0.0212 batch=160 mse_vec=130.3201 mse_chk=330.5714 mse_trj=277.0001 tau_l2=51.5924 sem_align=0.0212 batch=180 mse_vec=64.9391 mse_chk=188.2772 mse_trj=165.8393 tau_l2=51.6017 sem_align=0.0212 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83093210083345, 'avg_mse_chunk': 228.9724947681743, 'avg_mse_traj': 191.03266843511256, 'avg_tau_l2': 51.59865496303495, 'avg_semantic_text_alignment': 0.02121743619524313, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87780978043214, 'avg_hard_mse_chunk': 229.02878331016868, 'avg_hard_mse_traj': 191.06751537191093, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 3 ===== ===== START CONTROL SEED 4 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 6363.04it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3866 mse_chk=329.0249 mse_trj=273.8585 tau_l2=51.5898 sem_align=-0.0202 batch=20 mse_vec=118.0609 mse_chk=199.8568 mse_trj=170.4618 tau_l2=51.5957 sem_align=-0.0202 batch=40 mse_vec=104.9354 mse_chk=267.9086 mse_trj=232.3715 tau_l2=51.5940 sem_align=-0.0202 batch=60 mse_vec=88.2968 mse_chk=240.8841 mse_trj=204.7731 tau_l2=51.5958 sem_align=-0.0202 batch=80 mse_vec=111.9479 mse_chk=184.6587 mse_trj=160.4077 tau_l2=51.5964 sem_align=-0.0202 batch=100 mse_vec=92.7812 mse_chk=237.7563 mse_trj=207.4150 tau_l2=51.5964 sem_align=-0.0202 batch=120 mse_vec=124.7844 mse_chk=337.7830 mse_trj=274.7623 tau_l2=51.5905 sem_align=-0.0202 batch=140 mse_vec=166.1174 mse_chk=275.2077 mse_trj=227.3160 tau_l2=51.5863 sem_align=-0.0201 batch=160 mse_vec=130.3228 mse_chk=330.5728 mse_trj=277.0011 tau_l2=51.5899 sem_align=-0.0202 batch=180 mse_vec=64.9409 mse_chk=188.2781 mse_trj=165.8401 tau_l2=51.5990 sem_align=-0.0202 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.8321500535828, 'avg_mse_chunk': 228.97341872578826, 'avg_mse_traj': 191.0333878954471, 'avg_tau_l2': 51.59625214634679, 'avg_semantic_text_alignment': -0.020161717872840264, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87902538674172, 'avg_hard_mse_chunk': 229.02970386046098, 'avg_hard_mse_traj': 191.06823270383546, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 4 ===== ===== START CONTROL SEED 5 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 2589.34it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3857 mse_chk=329.0244 mse_trj=273.8579 tau_l2=51.5911 sem_align=-0.0170 batch=20 mse_vec=118.0609 mse_chk=199.8568 mse_trj=170.4618 tau_l2=51.5964 sem_align=-0.0170 batch=40 mse_vec=104.9344 mse_chk=267.9082 mse_trj=232.3710 tau_l2=51.5953 sem_align=-0.0170 batch=60 mse_vec=88.2962 mse_chk=240.8838 mse_trj=204.7728 tau_l2=51.5970 sem_align=-0.0170 batch=80 mse_vec=111.9478 mse_chk=184.6587 mse_trj=160.4076 tau_l2=51.5970 sem_align=-0.0170 batch=100 mse_vec=92.7805 mse_chk=237.7560 mse_trj=207.4146 tau_l2=51.5976 sem_align=-0.0170 batch=120 mse_vec=124.7830 mse_chk=337.7822 mse_trj=274.7616 tau_l2=51.5923 sem_align=-0.0170 batch=140 mse_vec=166.1162 mse_chk=275.2066 mse_trj=227.3150 tau_l2=51.5889 sem_align=-0.0170 batch=160 mse_vec=130.3217 mse_chk=330.5722 mse_trj=277.0005 tau_l2=51.5913 sem_align=-0.0170 batch=180 mse_vec=64.9402 mse_chk=188.2778 mse_trj=165.8397 tau_l2=51.6004 sem_align=-0.0170 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83164940091127, 'avg_mse_chunk': 228.97311384506648, 'avg_mse_traj': 191.0330102614935, 'avg_tau_l2': 51.597501175179666, 'avg_semantic_text_alignment': -0.017000084187197423, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87852552949475, 'avg_hard_mse_chunk': 229.0294006263237, 'avg_hard_mse_traj': 191.06785520181617, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 5 ===== ===== START CONTROL SEED 6 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 3496.22it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3854 mse_chk=329.0242 mse_trj=273.8579 tau_l2=51.5913 sem_align=-0.0546 batch=20 mse_vec=118.0606 mse_chk=199.8566 mse_trj=170.4617 tau_l2=51.5968 sem_align=-0.0546 batch=40 mse_vec=104.9342 mse_chk=267.9080 mse_trj=232.3710 tau_l2=51.5954 sem_align=-0.0546 batch=60 mse_vec=88.2959 mse_chk=240.8837 mse_trj=204.7728 tau_l2=51.5972 sem_align=-0.0546 batch=80 mse_vec=111.9475 mse_chk=184.6586 mse_trj=160.4075 tau_l2=51.5975 sem_align=-0.0546 batch=100 mse_vec=92.7802 mse_chk=237.7557 mse_trj=207.4146 tau_l2=51.5978 sem_align=-0.0546 batch=120 mse_vec=124.7831 mse_chk=337.7821 mse_trj=274.7617 tau_l2=51.5921 sem_align=-0.0546 batch=140 mse_vec=166.1165 mse_chk=275.2068 mse_trj=227.3152 tau_l2=51.5883 sem_align=-0.0546 batch=160 mse_vec=130.3215 mse_chk=330.5720 mse_trj=277.0005 tau_l2=51.5914 sem_align=-0.0546 batch=180 mse_vec=64.9400 mse_chk=188.2776 mse_trj=165.8397 tau_l2=51.6006 sem_align=-0.0546 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83151434798268, 'avg_mse_chunk': 228.97295358562997, 'avg_mse_traj': 191.03301715323937, 'avg_tau_l2': 51.59761717569762, 'avg_semantic_text_alignment': -0.05463091368421665, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87839063800057, 'avg_hard_mse_chunk': 229.02924052511509, 'avg_hard_mse_traj': 191.06786396031887, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 6 ===== ===== START CONTROL SEED 7 ===== /venv/sigma_vla/lib/python3.10/site-packages/huggingface_hub/file_download.py:982: UserWarning: `local_dir_use_symlinks` parameter is deprecated and will be ignored. The process to download files to a local folder has been updated and do not rely on symlinks anymore. You only need to pass a destination folder as`local_dir`. For more details, check out https://huggingface.co/docs/huggingface_hub/main/en/guides/download#download-files-to-local-folder. warnings.warn( Fetching 6 files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:00<00:00, 2122.80it/s] [INFO] Using cached shard_dir: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace [INFO] Using cached telepathy_heads_path: /workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt /venv/sigma_vla/lib/python3.10/site-packages/transformers/utils/hub.py:127: FutureWarning: Using `TRANSFORMERS_CACHE` is deprecated and will be removed in v5 of Transformers. Use `HF_HOME` instead. warnings.warn( WARNING:bitsandbytes.cextension:Could not find the bitsandbytes CUDA binary at PosixPath('/venv/sigma_vla/lib/python3.10/site-packages/bitsandbytes/libbitsandbytes_cuda126.so') WARNING:bitsandbytes.cextension:The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable. [policies_init] WARNING: optional groot deps missing: Failed to import diffusers.models.modeling_utils because of the following error (look up to see its traceback): No module named 'triton.ops' The PI05 model is a direct port of the OpenPI implementation. This implementation follows the original OpenPI structure for compatibility. Original implementation: https://github.com/Physical-Intelligence/openpi WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. WARNING:lerobot.configs.policies:Device 'mps' is not available. Switching to 'cuda'. /venv/sigma_vla/lib/python3.10/site-packages/transformers/models/paligemma/configuration_paligemma.py:137: FutureWarning: The `vocab_size` attribute is deprecated and will be removed in v4.44, Please use `text_config.vocab_size` instead. warnings.warn( WARNING:root:[patch_pi05] Could not run transformers version guard (An incorrect transformer version is used, please create an issue on https://github.com/huggingface/lerobot/issues). Continuing without strict transformers check. cannot import name 'check' from 'transformers.models.siglip' (/venv/sigma_vla/lib/python3.10/site-packages/transformers/models/siglip/__init__.py) Loading model from: lerobot/pi05_base ✓ Loaded state dict from model.safetensors WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias WARNING:root:Vision embedding key might need handling: paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight Remapped: action_in_proj.bias -> model.action_in_proj.bias Remapped: action_in_proj.weight -> model.action_in_proj.weight Remapped: action_out_proj.bias -> model.action_out_proj.bias Remapped: action_out_proj.weight -> model.action_out_proj.weight Remapped: paligemma_with_expert.gemma_expert.lm_head.weight -> model.paligemma_with_expert.gemma_expert.lm_head.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias Remapped: paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.down_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.gate_proj.weight Remapped: paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight -> model.paligemma_with_expert.gemma_expert.model.layers.0.mlp.up_proj.weight Remapped 812 state dict keys Warning: Could not remap state dict keys: Error(s) in loading state_dict for PI05Policy: Missing key(s) in state_dict: "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.paligemma.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.language_model.model.embed_tokens.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.language_model.model.norm.weight", "model.paligemma_with_expert.paligemma.language_model.lm_head.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.norm.weight". Unexpected key(s) in state_dict: "model.paligemma_with_expert.paligemma.lm_head.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.input_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.down_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.gate_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.mlp.up_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.post_attention_layernorm.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.o_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.language_model.norm.weight", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.bias", "model.paligemma_with_expert.paligemma.model.multi_modal_projector.linear.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.patch_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.embeddings.position_embedding.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.10.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.11.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.12.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.13.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.14.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.15.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.16.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.17.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.18.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.19.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.2.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.20.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.21.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.22.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.23.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.24.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.25.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.26.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.3.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.4.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.5.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.6.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.7.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.8.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.layer_norm2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc1.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.mlp.fc2.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.k_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.out_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.q_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.encoder.layers.9.self_attn.v_proj.weight", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.bias", "model.paligemma_with_expert.paligemma.model.vision_tower.vision_model.post_layernorm.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.0.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.1.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.2.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.3.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.4.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.5.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.6.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.7.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.8.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.9.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.10.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.11.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.12.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.13.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.14.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.15.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.16.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.input_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.layers.17.post_attention_layernorm.dense.weight", "model.paligemma_with_expert.gemma_expert.model.norm.dense.bias", "model.paligemma_with_expert.gemma_expert.model.norm.dense.weight". /venv/sigma_vla/lib/python3.10/site-packages/torch/nn/modules/transformer.py:382: UserWarning: enable_nested_tensor is True, but self.use_nested_tensor is False because encoder_layer.norm_first was True warnings.warn( [CHECK-A] disable_telepathy=True [CHECK-A] telepathy_heads_path=/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_lora_out/sigma_telepathy_heads.pt size=561.95MB [CHECK-A] heads_tensors=325 mean=0.002335 std=0.106945 rms=0.106970 [CHECK-A] heads fully matched (no missing/unexpected). [INFO] Found 3 shard files. Example: ['/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00000.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00001.pt', '/workspace/.hf_sigma_cache/Veltraxor__Sigma/storage/sigma_pickplace/shard_00002.pt'] [CHECK-B] telepathy_effect_mean_abs_diff(action_vector)=0.000000 batch=0 mse_vec=120.3846 mse_chk=329.0237 mse_trj=273.8578 tau_l2=51.5920 sem_align=-0.0300 batch=20 mse_vec=118.0605 mse_chk=199.8563 mse_trj=170.4615 tau_l2=51.5977 sem_align=-0.0300 batch=40 mse_vec=104.9334 mse_chk=267.9075 mse_trj=232.3708 tau_l2=51.5962 sem_align=-0.0300 batch=60 mse_vec=88.2953 mse_chk=240.8833 mse_trj=204.7726 tau_l2=51.5980 sem_align=-0.0300 batch=80 mse_vec=111.9474 mse_chk=184.6583 mse_trj=160.4074 tau_l2=51.5983 sem_align=-0.0300 batch=100 mse_vec=92.7795 mse_chk=237.7554 mse_trj=207.4145 tau_l2=51.5985 sem_align=-0.0300 batch=120 mse_vec=124.7822 mse_chk=337.7816 mse_trj=274.7615 tau_l2=51.5930 sem_align=-0.0300 batch=140 mse_vec=166.1161 mse_chk=275.2064 mse_trj=227.3151 tau_l2=51.5894 sem_align=-0.0300 batch=160 mse_vec=130.3206 mse_chk=330.5715 mse_trj=277.0004 tau_l2=51.5921 sem_align=-0.0300 batch=180 mse_vec=64.9395 mse_chk=188.2773 mse_trj=165.8396 tau_l2=51.6013 sem_align=-0.0300 [DONE] Saved report: {'num_samples': 723, 'num_batches': 181, 'avg_mse_vector': 98.83116331153153, 'avg_mse_chunk': 228.97256115655213, 'avg_mse_traj': 191.03286526084605, 'avg_tau_l2': 51.598475039993204, 'avg_semantic_text_alignment': -0.02995842005748775, 'hard_thresholds': {'vec': 0.1, 'chk': 0.2, 'trj': 0.2}, 'avg_hard_mse_vector': 98.87803927430804, 'avg_hard_mse_chunk': 229.0288477643067, 'avg_hard_mse_traj': 191.0677104438159, 'hard_sample_fraction': 1.0, 'total_hard_samples': 723} ===== END CONTROL SEED 7 =====