diff --git "a/artifacts/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/summary.json" "b/artifacts/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/summary.json" new file mode 100644--- /dev/null +++ "b/artifacts/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/summary.json" @@ -0,0 +1,1361 @@ +{ + "experiment_name": "proxy_adapter_wrapped_clip_base_reuse128_seed17", + "device": "cuda", + "best_checkpoint": "/workspace/workspace/outputs/adapter_proxy/proxy_adapter_wrapped_clip_base_reuse128_seed17/checkpoint_best.pt", + "final_train_total": 2.383075835324135, + "final_val_total": 2.3298022985458373, + "train_time_sec": 139.2956690788269, + "peak_gpu_memory_mb": 1625.2607421875, + "num_train_samples": 950, + "num_val_samples": 240, + "planner_mode": false, + "frozen_modules": [], + "trainable_parameter_prefixes": [ + "adapter.state_head", + "adapter.proposal_prior", + "adapter.transition_model", + "adapter.planner" + ], + "trainable_parameter_names": [ + "adapter.state_head.interaction_queries", + "adapter.state_head.interaction_attention.in_proj_weight", + "adapter.state_head.interaction_attention.in_proj_bias", + "adapter.state_head.interaction_attention.out_proj.weight", + "adapter.state_head.interaction_attention.out_proj.bias", + "adapter.state_head.interaction_mlp.0.weight", + "adapter.state_head.interaction_mlp.0.bias", + "adapter.state_head.interaction_mlp.1.weight", + "adapter.state_head.interaction_mlp.1.bias", + "adapter.state_head.interaction_mlp.3.weight", + "adapter.state_head.interaction_mlp.3.bias", + "adapter.state_head.decoder.field_queries", + "adapter.state_head.decoder.field_attention.in_proj_weight", + "adapter.state_head.decoder.field_attention.in_proj_bias", + "adapter.state_head.decoder.field_attention.out_proj.weight", + "adapter.state_head.decoder.field_attention.out_proj.bias", + "adapter.state_head.decoder.field_mlp.0.weight", + "adapter.state_head.decoder.field_mlp.0.bias", + "adapter.state_head.decoder.field_mlp.1.weight", + "adapter.state_head.decoder.field_mlp.1.bias", + "adapter.state_head.decoder.field_mlp.3.weight", + "adapter.state_head.decoder.field_mlp.3.bias", + "adapter.state_head.decoder.summary_proj.0.weight", + "adapter.state_head.decoder.summary_proj.0.bias", + "adapter.state_head.decoder.summary_proj.1.weight", + "adapter.state_head.decoder.summary_proj.1.bias", + "adapter.state_head.decoder.phase_head.0.weight", + "adapter.state_head.decoder.phase_head.0.bias", + "adapter.state_head.decoder.phase_head.1.weight", + "adapter.state_head.decoder.phase_head.1.bias", + "adapter.state_head.decoder.phase_head.3.weight", + "adapter.state_head.decoder.phase_head.3.bias", + "adapter.state_head.decoder.arm_role_head.0.weight", + "adapter.state_head.decoder.arm_role_head.0.bias", + "adapter.state_head.decoder.arm_role_head.1.weight", + "adapter.state_head.decoder.arm_role_head.1.bias", + "adapter.state_head.decoder.arm_role_head.3.weight", + "adapter.state_head.decoder.arm_role_head.3.bias", + "adapter.state_head.decoder.arm_identity.weight", + "adapter.state_head.decoder.support_mode.0.weight", + "adapter.state_head.decoder.support_mode.0.bias", + "adapter.state_head.decoder.support_mode.1.weight", + "adapter.state_head.decoder.support_mode.1.bias", + "adapter.state_head.decoder.support_mode.3.weight", + "adapter.state_head.decoder.support_mode.3.bias", + "adapter.state_head.decoder.access_field.weight", + "adapter.state_head.decoder.access_field.bias", + "adapter.state_head.decoder.target_belief_field.weight", + "adapter.state_head.decoder.target_belief_field.bias", + "adapter.state_head.decoder.visibility_field.weight", + "adapter.state_head.decoder.visibility_field.bias", + "adapter.state_head.decoder.clearance_field.weight", + "adapter.state_head.decoder.clearance_field.bias", + "adapter.state_head.decoder.occluder_contact_field.weight", + "adapter.state_head.decoder.occluder_contact_field.bias", + "adapter.state_head.decoder.grasp_affordance_field.weight", + "adapter.state_head.decoder.grasp_affordance_field.bias", + "adapter.state_head.decoder.support_stability_field.weight", + "adapter.state_head.decoder.support_stability_field.bias", + "adapter.state_head.decoder.persistence_field.weight", + "adapter.state_head.decoder.persistence_field.bias", + "adapter.state_head.decoder.reocclusion_field.weight", + "adapter.state_head.decoder.reocclusion_field.bias", + "adapter.state_head.decoder.disturbance_field.weight", + "adapter.state_head.decoder.disturbance_field.bias", + "adapter.state_head.decoder.uncertainty_field.weight", + "adapter.state_head.decoder.uncertainty_field.bias", + "adapter.state_head.decoder.reocclusion_head.0.weight", + "adapter.state_head.decoder.reocclusion_head.0.bias", + "adapter.state_head.decoder.reocclusion_head.1.weight", + "adapter.state_head.decoder.reocclusion_head.1.bias", + "adapter.state_head.decoder.reocclusion_head.3.weight", + "adapter.state_head.decoder.reocclusion_head.3.bias", + "adapter.state_head.decoder.task_embedding.weight", + "adapter.state_head.decoder.task_field_affine.weight", + "adapter.state_head.decoder.task_field_affine.bias", + "adapter.state_head.decoder.task_summary_adapter.0.weight", + "adapter.state_head.decoder.task_summary_adapter.0.bias", + "adapter.state_head.decoder.task_summary_adapter.1.weight", + "adapter.state_head.decoder.task_summary_adapter.1.bias", + "adapter.state_head.decoder.task_phase_head.weight", + "adapter.state_head.decoder.task_phase_head.bias", + "adapter.state_head.decoder.task_support_head.weight", + "adapter.state_head.decoder.task_support_head.bias", + "adapter.state_head.decoder.task_reocclusion_head.weight", + "adapter.state_head.decoder.task_reocclusion_head.bias", + "adapter.state_head.decoder.task_metric_head.0.weight", + "adapter.state_head.decoder.task_metric_head.0.bias", + "adapter.state_head.decoder.task_metric_head.1.weight", + "adapter.state_head.decoder.task_metric_head.1.bias", + "adapter.state_head.decoder.task_metric_head.3.weight", + "adapter.state_head.decoder.task_metric_head.3.bias", + "adapter.proposal_prior.task_embedding.weight", + "adapter.proposal_prior.context_proj.0.weight", + "adapter.proposal_prior.context_proj.0.bias", + "adapter.proposal_prior.context_proj.1.weight", + "adapter.proposal_prior.context_proj.1.bias", + "adapter.proposal_prior.mode_score_head.weight", + "adapter.proposal_prior.mode_score_head.bias", + "adapter.proposal_prior.mode_residual_heads.0.0.weight", + "adapter.proposal_prior.mode_residual_heads.0.0.bias", + "adapter.proposal_prior.mode_residual_heads.0.1.weight", + "adapter.proposal_prior.mode_residual_heads.0.1.bias", + "adapter.proposal_prior.mode_residual_heads.0.3.weight", + "adapter.proposal_prior.mode_residual_heads.0.3.bias", + "adapter.proposal_prior.mode_residual_heads.1.0.weight", + "adapter.proposal_prior.mode_residual_heads.1.0.bias", + "adapter.proposal_prior.mode_residual_heads.1.1.weight", + "adapter.proposal_prior.mode_residual_heads.1.1.bias", + "adapter.proposal_prior.mode_residual_heads.1.3.weight", + "adapter.proposal_prior.mode_residual_heads.1.3.bias", + "adapter.proposal_prior.mode_residual_heads.2.0.weight", + "adapter.proposal_prior.mode_residual_heads.2.0.bias", + "adapter.proposal_prior.mode_residual_heads.2.1.weight", + "adapter.proposal_prior.mode_residual_heads.2.1.bias", + "adapter.proposal_prior.mode_residual_heads.2.3.weight", + "adapter.proposal_prior.mode_residual_heads.2.3.bias", + "adapter.proposal_prior.mode_residual_heads.3.0.weight", + "adapter.proposal_prior.mode_residual_heads.3.0.bias", + "adapter.proposal_prior.mode_residual_heads.3.1.weight", + "adapter.proposal_prior.mode_residual_heads.3.1.bias", + "adapter.proposal_prior.mode_residual_heads.3.3.weight", + "adapter.proposal_prior.mode_residual_heads.3.3.bias", + "adapter.proposal_prior.mode_residual_heads.4.0.weight", + "adapter.proposal_prior.mode_residual_heads.4.0.bias", + "adapter.proposal_prior.mode_residual_heads.4.1.weight", + "adapter.proposal_prior.mode_residual_heads.4.1.bias", + "adapter.proposal_prior.mode_residual_heads.4.3.weight", + "adapter.proposal_prior.mode_residual_heads.4.3.bias", + "adapter.proposal_prior.mode_residual_heads.5.0.weight", + "adapter.proposal_prior.mode_residual_heads.5.0.bias", + "adapter.proposal_prior.mode_residual_heads.5.1.weight", + "adapter.proposal_prior.mode_residual_heads.5.1.bias", + "adapter.proposal_prior.mode_residual_heads.5.3.weight", + "adapter.proposal_prior.mode_residual_heads.5.3.bias", + "adapter.proposal_prior.mode_residual_heads.6.0.weight", + "adapter.proposal_prior.mode_residual_heads.6.0.bias", + "adapter.proposal_prior.mode_residual_heads.6.1.weight", + "adapter.proposal_prior.mode_residual_heads.6.1.bias", + "adapter.proposal_prior.mode_residual_heads.6.3.weight", + "adapter.proposal_prior.mode_residual_heads.6.3.bias", + "adapter.proposal_prior.slot_embedding.weight", + "adapter.proposal_prior.slot_delta.0.weight", + "adapter.proposal_prior.slot_delta.0.bias", + "adapter.proposal_prior.slot_delta.1.weight", + "adapter.proposal_prior.slot_delta.1.bias", + "adapter.proposal_prior.slot_delta.3.weight", + "adapter.proposal_prior.slot_delta.3.bias", + "adapter.transition_model.state_encoder.0.weight", + "adapter.transition_model.state_encoder.0.bias", + "adapter.transition_model.state_encoder.1.weight", + "adapter.transition_model.state_encoder.1.bias", + "adapter.transition_model.action_encoder.0.weight", + "adapter.transition_model.action_encoder.0.bias", + "adapter.transition_model.action_encoder.1.weight", + "adapter.transition_model.action_encoder.1.bias", + "adapter.transition_model.mode_embedding.weight", + "adapter.transition_model.transition.weight_ih_l0", + "adapter.transition_model.transition.weight_hh_l0", + "adapter.transition_model.transition.bias_ih_l0", + "adapter.transition_model.transition.bias_hh_l0", + "adapter.transition_model.summary_decoder.weight", + "adapter.transition_model.summary_decoder.bias", + "adapter.transition_model.access_field_head.weight", + "adapter.transition_model.access_field_head.bias", + "adapter.transition_model.support_field_head.weight", + "adapter.transition_model.support_field_head.bias", + "adapter.planner.reranker.network.0.weight", + "adapter.planner.reranker.network.0.bias", + "adapter.planner.reranker.network.1.weight", + "adapter.planner.reranker.network.1.bias", + "adapter.planner.reranker.network.3.weight", + "adapter.planner.reranker.network.3.bias", + "adapter.planner.reranker.score_head.weight", + "adapter.planner.reranker.score_head.bias", + "adapter.planner.reranker.success_head.weight", + "adapter.planner.reranker.success_head.bias", + "adapter.planner.reranker.risk_head.weight", + "adapter.planner.reranker.risk_head.bias" + ], + "init_info": { + "path": "/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt", + "loaded_keys": 601, + "skipped_shape_mismatch_keys": [ + "memory.scene_memory.position_embedding", + "memory.scene_memory.bank_queries", + "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_weight", + "memory.scene_memory.sequence_encoder.layers.0.self_attn.in_proj_bias", + "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.weight", + "memory.scene_memory.sequence_encoder.layers.0.self_attn.out_proj.bias", + "memory.scene_memory.sequence_encoder.layers.0.linear1.weight", + "memory.scene_memory.sequence_encoder.layers.0.linear1.bias", + "memory.scene_memory.sequence_encoder.layers.0.linear2.weight", + "memory.scene_memory.sequence_encoder.layers.0.linear2.bias", + "memory.scene_memory.sequence_encoder.layers.0.norm1.weight", + "memory.scene_memory.sequence_encoder.layers.0.norm1.bias", + "memory.scene_memory.sequence_encoder.layers.0.norm2.weight", + "memory.scene_memory.sequence_encoder.layers.0.norm2.bias", + "memory.scene_memory.bank_attention.in_proj_weight", + "memory.scene_memory.bank_attention.in_proj_bias", + "memory.scene_memory.bank_attention.out_proj.weight", + "memory.scene_memory.bank_attention.out_proj.bias", + "memory.scene_memory.action_proj.0.weight", + "memory.scene_memory.action_proj.0.bias", + "memory.scene_memory.action_proj.1.weight", + "memory.scene_memory.action_proj.1.bias", + "memory.scene_memory.write_gate.0.weight", + "memory.scene_memory.write_gate.0.bias", + "memory.scene_memory.write_gate.1.weight", + "memory.scene_memory.write_gate.1.bias", + "memory.scene_memory.write_gate.3.weight", + "memory.scene_memory.write_gate.3.bias", + "memory.scene_memory.token_proj.0.weight", + "memory.scene_memory.token_proj.0.bias", + "memory.scene_memory.token_proj.1.weight", + "memory.scene_memory.token_proj.1.bias", + "memory.belief_memory.position_embedding", + "memory.belief_memory.bank_queries", + "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_weight", + "memory.belief_memory.sequence_encoder.layers.0.self_attn.in_proj_bias", + "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.weight", + "memory.belief_memory.sequence_encoder.layers.0.self_attn.out_proj.bias", + "memory.belief_memory.sequence_encoder.layers.0.linear1.weight", + "memory.belief_memory.sequence_encoder.layers.0.linear1.bias", + "memory.belief_memory.sequence_encoder.layers.0.linear2.weight", + "memory.belief_memory.sequence_encoder.layers.0.linear2.bias", + "memory.belief_memory.sequence_encoder.layers.0.norm1.weight", + "memory.belief_memory.sequence_encoder.layers.0.norm1.bias", + "memory.belief_memory.sequence_encoder.layers.0.norm2.weight", + "memory.belief_memory.sequence_encoder.layers.0.norm2.bias", + "memory.belief_memory.bank_attention.in_proj_weight", + "memory.belief_memory.bank_attention.in_proj_bias", + "memory.belief_memory.bank_attention.out_proj.weight", + "memory.belief_memory.bank_attention.out_proj.bias", + "memory.belief_memory.action_proj.0.weight", + "memory.belief_memory.action_proj.0.bias", + "memory.belief_memory.action_proj.1.weight", + "memory.belief_memory.action_proj.1.bias", + "memory.belief_memory.write_gate.0.weight", + "memory.belief_memory.write_gate.0.bias", + "memory.belief_memory.write_gate.1.weight", + "memory.belief_memory.write_gate.1.bias", + "memory.belief_memory.write_gate.3.weight", + "memory.belief_memory.write_gate.3.bias", + "memory.belief_memory.token_proj.0.weight", + "memory.belief_memory.token_proj.0.bias", + "memory.belief_memory.token_proj.1.weight", + "memory.belief_memory.token_proj.1.bias", + "decoder.arm_decoder.layers.0.self_attn.in_proj_weight", + "decoder.arm_decoder.layers.0.self_attn.in_proj_bias", + "decoder.arm_decoder.layers.0.self_attn.out_proj.weight", + "decoder.arm_decoder.layers.0.self_attn.out_proj.bias", + "decoder.arm_decoder.layers.0.multihead_attn.in_proj_weight", + "decoder.arm_decoder.layers.0.multihead_attn.in_proj_bias", + "decoder.arm_decoder.layers.0.multihead_attn.out_proj.weight", + "decoder.arm_decoder.layers.0.multihead_attn.out_proj.bias", + "decoder.arm_decoder.layers.0.linear1.weight", + "decoder.arm_decoder.layers.0.linear1.bias", + "decoder.arm_decoder.layers.0.linear2.weight", + "decoder.arm_decoder.layers.0.linear2.bias", + "decoder.arm_decoder.layers.0.norm1.weight", + "decoder.arm_decoder.layers.0.norm1.bias", + "decoder.arm_decoder.layers.0.norm2.weight", + "decoder.arm_decoder.layers.0.norm2.bias", + "decoder.arm_decoder.layers.0.norm3.weight", + "decoder.arm_decoder.layers.0.norm3.bias", + "decoder.arm_decoder.layers.1.self_attn.in_proj_weight", + "decoder.arm_decoder.layers.1.self_attn.in_proj_bias", + "decoder.arm_decoder.layers.1.self_attn.out_proj.weight", + "decoder.arm_decoder.layers.1.self_attn.out_proj.bias", + "decoder.arm_decoder.layers.1.multihead_attn.in_proj_weight", + "decoder.arm_decoder.layers.1.multihead_attn.in_proj_bias", + "decoder.arm_decoder.layers.1.multihead_attn.out_proj.weight", + "decoder.arm_decoder.layers.1.multihead_attn.out_proj.bias", + "decoder.arm_decoder.layers.1.linear1.weight", + "decoder.arm_decoder.layers.1.linear1.bias", + "decoder.arm_decoder.layers.1.linear2.weight", + "decoder.arm_decoder.layers.1.linear2.bias", + "decoder.arm_decoder.layers.1.norm1.weight", + "decoder.arm_decoder.layers.1.norm1.bias", + "decoder.arm_decoder.layers.1.norm2.weight", + "decoder.arm_decoder.layers.1.norm2.bias", + "decoder.arm_decoder.layers.1.norm3.weight", + "decoder.arm_decoder.layers.1.norm3.bias", + "decoder.arm_decoder.layers.2.self_attn.in_proj_weight", + "decoder.arm_decoder.layers.2.self_attn.in_proj_bias", + "decoder.arm_decoder.layers.2.self_attn.out_proj.weight", + "decoder.arm_decoder.layers.2.self_attn.out_proj.bias", + "decoder.arm_decoder.layers.2.multihead_attn.in_proj_weight", + "decoder.arm_decoder.layers.2.multihead_attn.in_proj_bias", + "decoder.arm_decoder.layers.2.multihead_attn.out_proj.weight", + "decoder.arm_decoder.layers.2.multihead_attn.out_proj.bias", + "decoder.arm_decoder.layers.2.linear1.weight", + "decoder.arm_decoder.layers.2.linear1.bias", + "decoder.arm_decoder.layers.2.linear2.weight", + "decoder.arm_decoder.layers.2.linear2.bias", + "decoder.arm_decoder.layers.2.norm1.weight", + "decoder.arm_decoder.layers.2.norm1.bias", + "decoder.arm_decoder.layers.2.norm2.weight", + "decoder.arm_decoder.layers.2.norm2.bias", + "decoder.arm_decoder.layers.2.norm3.weight", + "decoder.arm_decoder.layers.2.norm3.bias", + "decoder.arm_decoder.layers.3.self_attn.in_proj_weight", + "decoder.arm_decoder.layers.3.self_attn.in_proj_bias", + "decoder.arm_decoder.layers.3.self_attn.out_proj.weight", + "decoder.arm_decoder.layers.3.self_attn.out_proj.bias", + "decoder.arm_decoder.layers.3.multihead_attn.in_proj_weight", + "decoder.arm_decoder.layers.3.multihead_attn.in_proj_bias", + "decoder.arm_decoder.layers.3.multihead_attn.out_proj.weight", + "decoder.arm_decoder.layers.3.multihead_attn.out_proj.bias", + "decoder.arm_decoder.layers.3.linear1.weight", + "decoder.arm_decoder.layers.3.linear1.bias", + "decoder.arm_decoder.layers.3.linear2.weight", + "decoder.arm_decoder.layers.3.linear2.bias", + "decoder.arm_decoder.layers.3.norm1.weight", + "decoder.arm_decoder.layers.3.norm1.bias", + "decoder.arm_decoder.layers.3.norm2.weight", + "decoder.arm_decoder.layers.3.norm2.bias", + "decoder.arm_decoder.layers.3.norm3.weight", + "decoder.arm_decoder.layers.3.norm3.bias", + "decoder.arm_identity.weight", + "decoder.task_embedding.weight", + "decoder.phase_adapter.weight", + "decoder.phase_adapter.bias", + "decoder.role_adapter.weight", + "decoder.role_adapter.bias", + "decoder.context_proj.0.weight", + "decoder.context_proj.0.bias", + "decoder.context_proj.1.weight", + "decoder.context_proj.1.bias", + "decoder.arm_head.0.weight", + "decoder.arm_head.0.bias", + "decoder.arm_head.1.weight", + "decoder.arm_head.1.bias", + "decoder.arm_mean.weight", + "decoder.arm_mean.bias", + "decoder.arm_log_std.weight", + "decoder.arm_log_std.bias", + "decoder.proposal_mode_head.0.weight", + "decoder.proposal_mode_head.0.bias", + "decoder.proposal_mode_head.1.weight", + "decoder.proposal_mode_head.1.bias", + "decoder.proposal_mode_head.3.weight", + "decoder.proposal_mode_head.3.bias", + "decoder.proposal_mode_embeddings.weight", + "decoder.proposal_slot_embeddings.weight", + "decoder.mode_residual_heads.0.0.weight", + "decoder.mode_residual_heads.0.0.bias", + "decoder.mode_residual_heads.0.1.weight", + "decoder.mode_residual_heads.0.1.bias", + "decoder.mode_residual_heads.0.3.weight", + "decoder.mode_residual_heads.0.3.bias", + "decoder.mode_residual_heads.1.0.weight", + "decoder.mode_residual_heads.1.0.bias", + "decoder.mode_residual_heads.1.1.weight", + "decoder.mode_residual_heads.1.1.bias", + "decoder.mode_residual_heads.1.3.weight", + "decoder.mode_residual_heads.1.3.bias", + "decoder.mode_residual_heads.2.0.weight", + "decoder.mode_residual_heads.2.0.bias", + "decoder.mode_residual_heads.2.1.weight", + "decoder.mode_residual_heads.2.1.bias", + "decoder.mode_residual_heads.2.3.weight", + "decoder.mode_residual_heads.2.3.bias", + "decoder.mode_residual_heads.3.0.weight", + "decoder.mode_residual_heads.3.0.bias", + "decoder.mode_residual_heads.3.1.weight", + "decoder.mode_residual_heads.3.1.bias", + "decoder.mode_residual_heads.3.3.weight", + "decoder.mode_residual_heads.3.3.bias", + "decoder.mode_residual_heads.4.0.weight", + "decoder.mode_residual_heads.4.0.bias", + "decoder.mode_residual_heads.4.1.weight", + "decoder.mode_residual_heads.4.1.bias", + "decoder.mode_residual_heads.4.3.weight", + "decoder.mode_residual_heads.4.3.bias", + "decoder.mode_residual_heads.5.0.weight", + "decoder.mode_residual_heads.5.0.bias", + "decoder.mode_residual_heads.5.1.weight", + "decoder.mode_residual_heads.5.1.bias", + "decoder.mode_residual_heads.5.3.weight", + "decoder.mode_residual_heads.5.3.bias", + "decoder.mode_residual_heads.6.0.weight", + "decoder.mode_residual_heads.6.0.bias", + "decoder.mode_residual_heads.6.1.weight", + "decoder.mode_residual_heads.6.1.bias", + "decoder.mode_residual_heads.6.3.weight", + "decoder.mode_residual_heads.6.3.bias", + "decoder.slot_delta.0.weight", + "decoder.slot_delta.0.bias", + "decoder.slot_delta.1.weight", + "decoder.slot_delta.1.bias", + "decoder.slot_delta.3.weight", + "decoder.slot_delta.3.bias", + "decoder.proposal_score.0.weight", + "decoder.proposal_score.0.bias", + "decoder.proposal_score.1.weight", + "decoder.proposal_score.1.bias", + "decoder.proposal_score.3.weight", + "decoder.proposal_score.3.bias", + "world_model.scene_memory_proj.0.weight", + "world_model.scene_memory_proj.0.bias", + "world_model.scene_memory_proj.1.weight", + "world_model.scene_memory_proj.1.bias", + "world_model.belief_memory_proj.0.weight", + "world_model.belief_memory_proj.0.bias", + "world_model.belief_memory_proj.1.weight", + "world_model.belief_memory_proj.1.bias", + "world_model.transition.weight_ih", + "world_model.scene_memory_update.weight", + "world_model.scene_memory_update.bias", + "world_model.belief_memory_update.weight", + "world_model.belief_memory_update.bias", + "world_model.compact_decoder.weight", + "world_model.compact_decoder.bias", + "world_model.target_belief_head.weight", + "world_model.target_belief_head.bias", + "world_model.visibility_head.weight", + "world_model.visibility_head.bias", + "world_model.clearance_head.weight", + "world_model.clearance_head.bias", + "world_model.occluder_contact_head.weight", + "world_model.occluder_contact_head.bias", + "world_model.grasp_affordance_head.weight", + "world_model.grasp_affordance_head.bias", + "world_model.support_stability_head.weight", + "world_model.support_stability_head.bias", + "world_model.persistence_head.weight", + "world_model.persistence_head.bias", + "world_model.reocclusion_head.weight", + "world_model.reocclusion_head.bias", + "world_model.disturbance_head.weight", + "world_model.disturbance_head.bias", + "world_model.uncertainty_head.weight", + "world_model.uncertainty_head.bias", + "world_model.access_head.weight", + "world_model.access_head.bias", + "world_model.task_embedding.weight", + "world_model.spatial_field_encoder.0.weight", + "world_model.spatial_field_encoder.0.bias", + "world_model.spatial_field_encoder.2.weight", + "world_model.spatial_field_encoder.2.bias", + "world_model.spatial_context_proj.0.weight", + "world_model.spatial_context_proj.0.bias", + "world_model.spatial_context_proj.1.weight", + "world_model.spatial_context_proj.1.bias", + "world_model.spatial_gate_z.weight", + "world_model.spatial_gate_z.bias", + "world_model.spatial_gate_r.weight", + "world_model.spatial_gate_r.bias", + "world_model.spatial_candidate.weight", + "world_model.spatial_candidate.bias", + "world_model.spatial_summary_proj.0.weight", + "world_model.spatial_summary_proj.0.bias", + "world_model.spatial_summary_proj.1.weight", + "world_model.spatial_summary_proj.1.bias", + "world_model.spatial_phase_head.weight", + "world_model.spatial_phase_head.bias", + "world_model.spatial_support_mode_head.weight", + "world_model.spatial_support_mode_head.bias", + "world_model.spatial_arm_role_head.weight", + "world_model.spatial_arm_role_head.bias", + "world_model.spatial_reocclusion_head.weight", + "world_model.spatial_reocclusion_head.bias", + "world_model.spatial_target_belief_head.weight", + "world_model.spatial_target_belief_head.bias", + "world_model.spatial_visibility_head.weight", + "world_model.spatial_visibility_head.bias", + "world_model.spatial_clearance_head.weight", + "world_model.spatial_clearance_head.bias", + "world_model.spatial_occluder_contact_head.weight", + "world_model.spatial_occluder_contact_head.bias", + "world_model.spatial_grasp_affordance_head.weight", + "world_model.spatial_grasp_affordance_head.bias", + "world_model.spatial_support_stability_head.weight", + "world_model.spatial_support_stability_head.bias", + "world_model.spatial_persistence_head.weight", + "world_model.spatial_persistence_head.bias", + "world_model.spatial_reocclusion_field_head.weight", + "world_model.spatial_reocclusion_field_head.bias", + "world_model.spatial_disturbance_head.weight", + "world_model.spatial_disturbance_head.bias", + "world_model.spatial_uncertainty_head.weight", + "world_model.spatial_uncertainty_head.bias", + "world_model.spatial_access_head.weight", + "world_model.spatial_access_head.bias", + "planner.residual.trunk.0.weight", + "planner.residual.trunk.0.bias", + "planner.residual.trunk.1.weight" + ], + "remapped_keys": { + "backbone.depth_adapter.depth_proj.0.weight": "trunk.backbone.depth_adapter.depth_proj.0.weight", + "backbone.depth_adapter.depth_proj.0.bias": "trunk.backbone.depth_adapter.depth_proj.0.bias", + "backbone.depth_adapter.depth_proj.1.weight": "trunk.backbone.depth_adapter.depth_proj.1.weight", + "backbone.depth_adapter.depth_proj.1.bias": "trunk.backbone.depth_adapter.depth_proj.1.bias", + "backbone.depth_adapter.depth_proj.3.weight": "trunk.backbone.depth_adapter.depth_proj.3.weight", + "backbone.depth_adapter.depth_proj.3.bias": "trunk.backbone.depth_adapter.depth_proj.3.bias", + "backbone.depth_adapter.geometry_proj.0.weight": "trunk.backbone.depth_adapter.geometry_proj.0.weight", + "backbone.depth_adapter.geometry_proj.0.bias": "trunk.backbone.depth_adapter.geometry_proj.0.bias", + "backbone.depth_adapter.geometry_proj.1.weight": "trunk.backbone.depth_adapter.geometry_proj.1.weight", + "backbone.depth_adapter.geometry_proj.1.bias": "trunk.backbone.depth_adapter.geometry_proj.1.bias", + "backbone.depth_adapter.camera_proj.0.weight": "trunk.backbone.depth_adapter.camera_proj.0.weight", + "backbone.depth_adapter.camera_proj.0.bias": "trunk.backbone.depth_adapter.camera_proj.0.bias", + "backbone.depth_adapter.camera_proj.1.weight": "trunk.backbone.depth_adapter.camera_proj.1.weight", + "backbone.depth_adapter.camera_proj.1.bias": "trunk.backbone.depth_adapter.camera_proj.1.bias", + "backbone.vision_model.embeddings.class_embedding": "trunk.backbone.vision_model.embeddings.class_embedding", + "backbone.vision_model.embeddings.patch_embedding.weight": "trunk.backbone.vision_model.embeddings.patch_embedding.weight", + "backbone.vision_model.embeddings.position_embedding.weight": "trunk.backbone.vision_model.embeddings.position_embedding.weight", + "backbone.vision_model.pre_layrnorm.weight": "trunk.backbone.vision_model.pre_layrnorm.weight", + "backbone.vision_model.pre_layrnorm.bias": "trunk.backbone.vision_model.pre_layrnorm.bias", + "backbone.vision_model.encoder.layers.0.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.0.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.0.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.0.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.0.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.0.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.0.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.0.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.0.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.0.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.0.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.0.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.0.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.0.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.0.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.0.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.0.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.0.layer_norm1.weight", + "backbone.vision_model.encoder.layers.0.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.0.layer_norm1.bias", + "backbone.vision_model.encoder.layers.0.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.0.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.0.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.0.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.0.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.0.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.0.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.0.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.0.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.0.layer_norm2.weight", + "backbone.vision_model.encoder.layers.0.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.0.layer_norm2.bias", + "backbone.vision_model.encoder.layers.1.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.1.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.1.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.1.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.1.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.1.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.1.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.1.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.1.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.1.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.1.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.1.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.1.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.1.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.1.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.1.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.1.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.1.layer_norm1.weight", + "backbone.vision_model.encoder.layers.1.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.1.layer_norm1.bias", + "backbone.vision_model.encoder.layers.1.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.1.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.1.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.1.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.1.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.1.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.1.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.1.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.1.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.1.layer_norm2.weight", + "backbone.vision_model.encoder.layers.1.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.1.layer_norm2.bias", + "backbone.vision_model.encoder.layers.2.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.2.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.2.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.2.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.2.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.2.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.2.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.2.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.2.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.2.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.2.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.2.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.2.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.2.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.2.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.2.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.2.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.2.layer_norm1.weight", + "backbone.vision_model.encoder.layers.2.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.2.layer_norm1.bias", + "backbone.vision_model.encoder.layers.2.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.2.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.2.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.2.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.2.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.2.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.2.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.2.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.2.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.2.layer_norm2.weight", + "backbone.vision_model.encoder.layers.2.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.2.layer_norm2.bias", + "backbone.vision_model.encoder.layers.3.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.3.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.3.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.3.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.3.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.3.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.3.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.3.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.3.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.3.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.3.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.3.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.3.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.3.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.3.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.3.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.3.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.3.layer_norm1.weight", + "backbone.vision_model.encoder.layers.3.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.3.layer_norm1.bias", + "backbone.vision_model.encoder.layers.3.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.3.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.3.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.3.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.3.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.3.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.3.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.3.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.3.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.3.layer_norm2.weight", + "backbone.vision_model.encoder.layers.3.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.3.layer_norm2.bias", + "backbone.vision_model.encoder.layers.4.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.4.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.4.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.4.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.4.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.4.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.4.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.4.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.4.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.4.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.4.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.4.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.4.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.4.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.4.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.4.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.4.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.4.layer_norm1.weight", + "backbone.vision_model.encoder.layers.4.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.4.layer_norm1.bias", + "backbone.vision_model.encoder.layers.4.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.4.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.4.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.4.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.4.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.4.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.4.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.4.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.4.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.4.layer_norm2.weight", + "backbone.vision_model.encoder.layers.4.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.4.layer_norm2.bias", + "backbone.vision_model.encoder.layers.5.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.5.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.5.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.5.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.5.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.5.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.5.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.5.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.5.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.5.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.5.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.5.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.5.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.5.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.5.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.5.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.5.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.5.layer_norm1.weight", + "backbone.vision_model.encoder.layers.5.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.5.layer_norm1.bias", + "backbone.vision_model.encoder.layers.5.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.5.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.5.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.5.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.5.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.5.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.5.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.5.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.5.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.5.layer_norm2.weight", + "backbone.vision_model.encoder.layers.5.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.5.layer_norm2.bias", + "backbone.vision_model.encoder.layers.6.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.6.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.6.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.6.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.6.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.6.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.6.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.6.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.6.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.6.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.6.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.6.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.6.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.6.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.6.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.6.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.6.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.6.layer_norm1.weight", + "backbone.vision_model.encoder.layers.6.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.6.layer_norm1.bias", + "backbone.vision_model.encoder.layers.6.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.6.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.6.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.6.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.6.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.6.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.6.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.6.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.6.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.6.layer_norm2.weight", + "backbone.vision_model.encoder.layers.6.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.6.layer_norm2.bias", + "backbone.vision_model.encoder.layers.7.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.7.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.7.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.7.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.7.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.7.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.7.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.7.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.7.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.7.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.7.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.7.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.7.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.7.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.7.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.7.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.7.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.7.layer_norm1.weight", + "backbone.vision_model.encoder.layers.7.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.7.layer_norm1.bias", + "backbone.vision_model.encoder.layers.7.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.7.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.7.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.7.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.7.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.7.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.7.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.7.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.7.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.7.layer_norm2.weight", + "backbone.vision_model.encoder.layers.7.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.7.layer_norm2.bias", + "backbone.vision_model.encoder.layers.8.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.8.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.8.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.8.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.8.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.8.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.8.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.8.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.8.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.8.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.8.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.8.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.8.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.8.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.8.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.8.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.8.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.8.layer_norm1.weight", + "backbone.vision_model.encoder.layers.8.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.8.layer_norm1.bias", + "backbone.vision_model.encoder.layers.8.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.8.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.8.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.8.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.8.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.8.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.8.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.8.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.8.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.8.layer_norm2.weight", + "backbone.vision_model.encoder.layers.8.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.8.layer_norm2.bias", + "backbone.vision_model.encoder.layers.9.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.9.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.9.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.9.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.9.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.9.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.9.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.9.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.9.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.9.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.9.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.9.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.9.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.9.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.9.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.9.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.9.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.9.layer_norm1.weight", + "backbone.vision_model.encoder.layers.9.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.9.layer_norm1.bias", + "backbone.vision_model.encoder.layers.9.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.9.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.9.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.9.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.9.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.9.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.9.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.9.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.9.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.9.layer_norm2.weight", + "backbone.vision_model.encoder.layers.9.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.9.layer_norm2.bias", + "backbone.vision_model.encoder.layers.10.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.10.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.10.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.10.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.10.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.10.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.10.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.10.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.10.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.10.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.10.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.10.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.10.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.10.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.10.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.10.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.10.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.10.layer_norm1.weight", + "backbone.vision_model.encoder.layers.10.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.10.layer_norm1.bias", + "backbone.vision_model.encoder.layers.10.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.10.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.10.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.10.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.10.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.10.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.10.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.10.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.10.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.10.layer_norm2.weight", + "backbone.vision_model.encoder.layers.10.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.10.layer_norm2.bias", + "backbone.vision_model.encoder.layers.11.self_attn.k_proj.weight": "trunk.backbone.vision_model.encoder.layers.11.self_attn.k_proj.weight", + "backbone.vision_model.encoder.layers.11.self_attn.k_proj.bias": "trunk.backbone.vision_model.encoder.layers.11.self_attn.k_proj.bias", + "backbone.vision_model.encoder.layers.11.self_attn.v_proj.weight": "trunk.backbone.vision_model.encoder.layers.11.self_attn.v_proj.weight", + "backbone.vision_model.encoder.layers.11.self_attn.v_proj.bias": "trunk.backbone.vision_model.encoder.layers.11.self_attn.v_proj.bias", + "backbone.vision_model.encoder.layers.11.self_attn.q_proj.weight": "trunk.backbone.vision_model.encoder.layers.11.self_attn.q_proj.weight", + "backbone.vision_model.encoder.layers.11.self_attn.q_proj.bias": "trunk.backbone.vision_model.encoder.layers.11.self_attn.q_proj.bias", + "backbone.vision_model.encoder.layers.11.self_attn.out_proj.weight": "trunk.backbone.vision_model.encoder.layers.11.self_attn.out_proj.weight", + "backbone.vision_model.encoder.layers.11.self_attn.out_proj.bias": "trunk.backbone.vision_model.encoder.layers.11.self_attn.out_proj.bias", + "backbone.vision_model.encoder.layers.11.layer_norm1.weight": "trunk.backbone.vision_model.encoder.layers.11.layer_norm1.weight", + "backbone.vision_model.encoder.layers.11.layer_norm1.bias": "trunk.backbone.vision_model.encoder.layers.11.layer_norm1.bias", + "backbone.vision_model.encoder.layers.11.mlp.fc1.weight": "trunk.backbone.vision_model.encoder.layers.11.mlp.fc1.weight", + "backbone.vision_model.encoder.layers.11.mlp.fc1.bias": "trunk.backbone.vision_model.encoder.layers.11.mlp.fc1.bias", + "backbone.vision_model.encoder.layers.11.mlp.fc2.weight": "trunk.backbone.vision_model.encoder.layers.11.mlp.fc2.weight", + "backbone.vision_model.encoder.layers.11.mlp.fc2.bias": "trunk.backbone.vision_model.encoder.layers.11.mlp.fc2.bias", + "backbone.vision_model.encoder.layers.11.layer_norm2.weight": "trunk.backbone.vision_model.encoder.layers.11.layer_norm2.weight", + "backbone.vision_model.encoder.layers.11.layer_norm2.bias": "trunk.backbone.vision_model.encoder.layers.11.layer_norm2.bias", + "backbone.vision_model.post_layernorm.weight": "trunk.backbone.vision_model.post_layernorm.weight", + "backbone.vision_model.post_layernorm.bias": "trunk.backbone.vision_model.post_layernorm.bias", + "backbone.text_model.embeddings.token_embedding.weight": "trunk.backbone.text_model.embeddings.token_embedding.weight", + "backbone.text_model.embeddings.position_embedding.weight": "trunk.backbone.text_model.embeddings.position_embedding.weight", + "backbone.text_model.encoder.layers.0.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.0.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.0.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.0.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.0.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.0.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.0.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.0.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.0.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.0.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.0.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.0.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.0.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.0.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.0.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.0.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.0.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.0.layer_norm1.weight", + "backbone.text_model.encoder.layers.0.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.0.layer_norm1.bias", + "backbone.text_model.encoder.layers.0.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.0.mlp.fc1.weight", + "backbone.text_model.encoder.layers.0.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.0.mlp.fc1.bias", + "backbone.text_model.encoder.layers.0.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.0.mlp.fc2.weight", + "backbone.text_model.encoder.layers.0.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.0.mlp.fc2.bias", + "backbone.text_model.encoder.layers.0.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.0.layer_norm2.weight", + "backbone.text_model.encoder.layers.0.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.0.layer_norm2.bias", + "backbone.text_model.encoder.layers.1.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.1.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.1.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.1.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.1.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.1.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.1.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.1.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.1.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.1.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.1.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.1.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.1.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.1.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.1.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.1.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.1.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.1.layer_norm1.weight", + "backbone.text_model.encoder.layers.1.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.1.layer_norm1.bias", + "backbone.text_model.encoder.layers.1.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.1.mlp.fc1.weight", + "backbone.text_model.encoder.layers.1.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.1.mlp.fc1.bias", + "backbone.text_model.encoder.layers.1.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.1.mlp.fc2.weight", + "backbone.text_model.encoder.layers.1.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.1.mlp.fc2.bias", + "backbone.text_model.encoder.layers.1.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.1.layer_norm2.weight", + "backbone.text_model.encoder.layers.1.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.1.layer_norm2.bias", + "backbone.text_model.encoder.layers.2.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.2.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.2.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.2.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.2.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.2.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.2.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.2.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.2.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.2.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.2.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.2.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.2.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.2.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.2.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.2.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.2.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.2.layer_norm1.weight", + "backbone.text_model.encoder.layers.2.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.2.layer_norm1.bias", + "backbone.text_model.encoder.layers.2.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.2.mlp.fc1.weight", + "backbone.text_model.encoder.layers.2.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.2.mlp.fc1.bias", + "backbone.text_model.encoder.layers.2.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.2.mlp.fc2.weight", + "backbone.text_model.encoder.layers.2.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.2.mlp.fc2.bias", + "backbone.text_model.encoder.layers.2.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.2.layer_norm2.weight", + "backbone.text_model.encoder.layers.2.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.2.layer_norm2.bias", + "backbone.text_model.encoder.layers.3.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.3.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.3.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.3.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.3.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.3.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.3.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.3.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.3.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.3.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.3.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.3.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.3.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.3.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.3.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.3.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.3.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.3.layer_norm1.weight", + "backbone.text_model.encoder.layers.3.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.3.layer_norm1.bias", + "backbone.text_model.encoder.layers.3.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.3.mlp.fc1.weight", + "backbone.text_model.encoder.layers.3.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.3.mlp.fc1.bias", + "backbone.text_model.encoder.layers.3.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.3.mlp.fc2.weight", + "backbone.text_model.encoder.layers.3.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.3.mlp.fc2.bias", + "backbone.text_model.encoder.layers.3.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.3.layer_norm2.weight", + "backbone.text_model.encoder.layers.3.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.3.layer_norm2.bias", + "backbone.text_model.encoder.layers.4.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.4.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.4.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.4.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.4.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.4.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.4.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.4.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.4.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.4.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.4.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.4.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.4.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.4.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.4.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.4.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.4.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.4.layer_norm1.weight", + "backbone.text_model.encoder.layers.4.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.4.layer_norm1.bias", + "backbone.text_model.encoder.layers.4.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.4.mlp.fc1.weight", + "backbone.text_model.encoder.layers.4.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.4.mlp.fc1.bias", + "backbone.text_model.encoder.layers.4.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.4.mlp.fc2.weight", + "backbone.text_model.encoder.layers.4.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.4.mlp.fc2.bias", + "backbone.text_model.encoder.layers.4.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.4.layer_norm2.weight", + "backbone.text_model.encoder.layers.4.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.4.layer_norm2.bias", + "backbone.text_model.encoder.layers.5.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.5.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.5.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.5.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.5.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.5.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.5.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.5.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.5.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.5.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.5.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.5.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.5.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.5.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.5.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.5.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.5.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.5.layer_norm1.weight", + "backbone.text_model.encoder.layers.5.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.5.layer_norm1.bias", + "backbone.text_model.encoder.layers.5.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.5.mlp.fc1.weight", + "backbone.text_model.encoder.layers.5.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.5.mlp.fc1.bias", + "backbone.text_model.encoder.layers.5.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.5.mlp.fc2.weight", + "backbone.text_model.encoder.layers.5.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.5.mlp.fc2.bias", + "backbone.text_model.encoder.layers.5.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.5.layer_norm2.weight", + "backbone.text_model.encoder.layers.5.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.5.layer_norm2.bias", + "backbone.text_model.encoder.layers.6.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.6.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.6.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.6.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.6.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.6.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.6.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.6.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.6.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.6.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.6.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.6.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.6.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.6.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.6.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.6.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.6.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.6.layer_norm1.weight", + "backbone.text_model.encoder.layers.6.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.6.layer_norm1.bias", + "backbone.text_model.encoder.layers.6.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.6.mlp.fc1.weight", + "backbone.text_model.encoder.layers.6.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.6.mlp.fc1.bias", + "backbone.text_model.encoder.layers.6.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.6.mlp.fc2.weight", + "backbone.text_model.encoder.layers.6.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.6.mlp.fc2.bias", + "backbone.text_model.encoder.layers.6.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.6.layer_norm2.weight", + "backbone.text_model.encoder.layers.6.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.6.layer_norm2.bias", + "backbone.text_model.encoder.layers.7.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.7.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.7.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.7.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.7.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.7.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.7.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.7.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.7.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.7.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.7.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.7.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.7.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.7.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.7.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.7.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.7.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.7.layer_norm1.weight", + "backbone.text_model.encoder.layers.7.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.7.layer_norm1.bias", + "backbone.text_model.encoder.layers.7.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.7.mlp.fc1.weight", + "backbone.text_model.encoder.layers.7.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.7.mlp.fc1.bias", + "backbone.text_model.encoder.layers.7.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.7.mlp.fc2.weight", + "backbone.text_model.encoder.layers.7.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.7.mlp.fc2.bias", + "backbone.text_model.encoder.layers.7.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.7.layer_norm2.weight", + "backbone.text_model.encoder.layers.7.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.7.layer_norm2.bias", + "backbone.text_model.encoder.layers.8.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.8.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.8.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.8.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.8.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.8.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.8.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.8.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.8.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.8.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.8.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.8.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.8.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.8.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.8.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.8.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.8.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.8.layer_norm1.weight", + "backbone.text_model.encoder.layers.8.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.8.layer_norm1.bias", + "backbone.text_model.encoder.layers.8.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.8.mlp.fc1.weight", + "backbone.text_model.encoder.layers.8.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.8.mlp.fc1.bias", + "backbone.text_model.encoder.layers.8.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.8.mlp.fc2.weight", + "backbone.text_model.encoder.layers.8.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.8.mlp.fc2.bias", + "backbone.text_model.encoder.layers.8.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.8.layer_norm2.weight", + "backbone.text_model.encoder.layers.8.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.8.layer_norm2.bias", + "backbone.text_model.encoder.layers.9.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.9.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.9.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.9.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.9.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.9.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.9.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.9.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.9.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.9.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.9.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.9.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.9.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.9.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.9.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.9.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.9.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.9.layer_norm1.weight", + "backbone.text_model.encoder.layers.9.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.9.layer_norm1.bias", + "backbone.text_model.encoder.layers.9.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.9.mlp.fc1.weight", + "backbone.text_model.encoder.layers.9.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.9.mlp.fc1.bias", + "backbone.text_model.encoder.layers.9.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.9.mlp.fc2.weight", + "backbone.text_model.encoder.layers.9.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.9.mlp.fc2.bias", + "backbone.text_model.encoder.layers.9.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.9.layer_norm2.weight", + "backbone.text_model.encoder.layers.9.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.9.layer_norm2.bias", + "backbone.text_model.encoder.layers.10.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.10.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.10.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.10.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.10.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.10.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.10.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.10.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.10.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.10.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.10.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.10.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.10.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.10.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.10.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.10.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.10.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.10.layer_norm1.weight", + "backbone.text_model.encoder.layers.10.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.10.layer_norm1.bias", + "backbone.text_model.encoder.layers.10.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.10.mlp.fc1.weight", + "backbone.text_model.encoder.layers.10.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.10.mlp.fc1.bias", + "backbone.text_model.encoder.layers.10.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.10.mlp.fc2.weight", + "backbone.text_model.encoder.layers.10.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.10.mlp.fc2.bias", + "backbone.text_model.encoder.layers.10.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.10.layer_norm2.weight", + "backbone.text_model.encoder.layers.10.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.10.layer_norm2.bias", + "backbone.text_model.encoder.layers.11.self_attn.k_proj.weight": "trunk.backbone.text_model.encoder.layers.11.self_attn.k_proj.weight", + "backbone.text_model.encoder.layers.11.self_attn.k_proj.bias": "trunk.backbone.text_model.encoder.layers.11.self_attn.k_proj.bias", + "backbone.text_model.encoder.layers.11.self_attn.v_proj.weight": "trunk.backbone.text_model.encoder.layers.11.self_attn.v_proj.weight", + "backbone.text_model.encoder.layers.11.self_attn.v_proj.bias": "trunk.backbone.text_model.encoder.layers.11.self_attn.v_proj.bias", + "backbone.text_model.encoder.layers.11.self_attn.q_proj.weight": "trunk.backbone.text_model.encoder.layers.11.self_attn.q_proj.weight", + "backbone.text_model.encoder.layers.11.self_attn.q_proj.bias": "trunk.backbone.text_model.encoder.layers.11.self_attn.q_proj.bias", + "backbone.text_model.encoder.layers.11.self_attn.out_proj.weight": "trunk.backbone.text_model.encoder.layers.11.self_attn.out_proj.weight", + "backbone.text_model.encoder.layers.11.self_attn.out_proj.bias": "trunk.backbone.text_model.encoder.layers.11.self_attn.out_proj.bias", + "backbone.text_model.encoder.layers.11.layer_norm1.weight": "trunk.backbone.text_model.encoder.layers.11.layer_norm1.weight", + "backbone.text_model.encoder.layers.11.layer_norm1.bias": "trunk.backbone.text_model.encoder.layers.11.layer_norm1.bias", + "backbone.text_model.encoder.layers.11.mlp.fc1.weight": "trunk.backbone.text_model.encoder.layers.11.mlp.fc1.weight", + "backbone.text_model.encoder.layers.11.mlp.fc1.bias": "trunk.backbone.text_model.encoder.layers.11.mlp.fc1.bias", + "backbone.text_model.encoder.layers.11.mlp.fc2.weight": "trunk.backbone.text_model.encoder.layers.11.mlp.fc2.weight", + "backbone.text_model.encoder.layers.11.mlp.fc2.bias": "trunk.backbone.text_model.encoder.layers.11.mlp.fc2.bias", + "backbone.text_model.encoder.layers.11.layer_norm2.weight": "trunk.backbone.text_model.encoder.layers.11.layer_norm2.weight", + "backbone.text_model.encoder.layers.11.layer_norm2.bias": "trunk.backbone.text_model.encoder.layers.11.layer_norm2.bias", + "backbone.text_model.final_layer_norm.weight": "trunk.backbone.text_model.final_layer_norm.weight", + "backbone.text_model.final_layer_norm.bias": "trunk.backbone.text_model.final_layer_norm.bias", + "backbone.visual_projection.weight": "trunk.backbone.visual_projection.weight", + "backbone.text_projection.weight": "trunk.backbone.text_projection.weight", + "fusion.camera_embedding.weight": "trunk.fusion.camera_embedding.weight", + "fusion.cross_view_transformer.layers.0.self_attn.in_proj_weight": "trunk.fusion.cross_view_transformer.layers.0.self_attn.in_proj_weight", + "fusion.cross_view_transformer.layers.0.self_attn.in_proj_bias": "trunk.fusion.cross_view_transformer.layers.0.self_attn.in_proj_bias", + "fusion.cross_view_transformer.layers.0.self_attn.out_proj.weight": "trunk.fusion.cross_view_transformer.layers.0.self_attn.out_proj.weight", + "fusion.cross_view_transformer.layers.0.self_attn.out_proj.bias": "trunk.fusion.cross_view_transformer.layers.0.self_attn.out_proj.bias", + "fusion.cross_view_transformer.layers.0.linear1.weight": "trunk.fusion.cross_view_transformer.layers.0.linear1.weight", + "fusion.cross_view_transformer.layers.0.linear1.bias": "trunk.fusion.cross_view_transformer.layers.0.linear1.bias", + "fusion.cross_view_transformer.layers.0.linear2.weight": "trunk.fusion.cross_view_transformer.layers.0.linear2.weight", + "fusion.cross_view_transformer.layers.0.linear2.bias": "trunk.fusion.cross_view_transformer.layers.0.linear2.bias", + "fusion.cross_view_transformer.layers.0.norm1.weight": "trunk.fusion.cross_view_transformer.layers.0.norm1.weight", + "fusion.cross_view_transformer.layers.0.norm1.bias": "trunk.fusion.cross_view_transformer.layers.0.norm1.bias", + "fusion.cross_view_transformer.layers.0.norm2.weight": "trunk.fusion.cross_view_transformer.layers.0.norm2.weight", + "fusion.cross_view_transformer.layers.0.norm2.bias": "trunk.fusion.cross_view_transformer.layers.0.norm2.bias", + "fusion.cross_view_transformer.layers.1.self_attn.in_proj_weight": "trunk.fusion.cross_view_transformer.layers.1.self_attn.in_proj_weight", + "fusion.cross_view_transformer.layers.1.self_attn.in_proj_bias": "trunk.fusion.cross_view_transformer.layers.1.self_attn.in_proj_bias", + "fusion.cross_view_transformer.layers.1.self_attn.out_proj.weight": "trunk.fusion.cross_view_transformer.layers.1.self_attn.out_proj.weight", + "fusion.cross_view_transformer.layers.1.self_attn.out_proj.bias": "trunk.fusion.cross_view_transformer.layers.1.self_attn.out_proj.bias", + "fusion.cross_view_transformer.layers.1.linear1.weight": "trunk.fusion.cross_view_transformer.layers.1.linear1.weight", + "fusion.cross_view_transformer.layers.1.linear1.bias": "trunk.fusion.cross_view_transformer.layers.1.linear1.bias", + "fusion.cross_view_transformer.layers.1.linear2.weight": "trunk.fusion.cross_view_transformer.layers.1.linear2.weight", + "fusion.cross_view_transformer.layers.1.linear2.bias": "trunk.fusion.cross_view_transformer.layers.1.linear2.bias", + "fusion.cross_view_transformer.layers.1.norm1.weight": "trunk.fusion.cross_view_transformer.layers.1.norm1.weight", + "fusion.cross_view_transformer.layers.1.norm1.bias": "trunk.fusion.cross_view_transformer.layers.1.norm1.bias", + "fusion.cross_view_transformer.layers.1.norm2.weight": "trunk.fusion.cross_view_transformer.layers.1.norm2.weight", + "fusion.cross_view_transformer.layers.1.norm2.bias": "trunk.fusion.cross_view_transformer.layers.1.norm2.bias", + "fusion.cross_view_transformer.layers.2.self_attn.in_proj_weight": "trunk.fusion.cross_view_transformer.layers.2.self_attn.in_proj_weight", + "fusion.cross_view_transformer.layers.2.self_attn.in_proj_bias": "trunk.fusion.cross_view_transformer.layers.2.self_attn.in_proj_bias", + "fusion.cross_view_transformer.layers.2.self_attn.out_proj.weight": "trunk.fusion.cross_view_transformer.layers.2.self_attn.out_proj.weight", + "fusion.cross_view_transformer.layers.2.self_attn.out_proj.bias": "trunk.fusion.cross_view_transformer.layers.2.self_attn.out_proj.bias", + "fusion.cross_view_transformer.layers.2.linear1.weight": "trunk.fusion.cross_view_transformer.layers.2.linear1.weight", + "fusion.cross_view_transformer.layers.2.linear1.bias": "trunk.fusion.cross_view_transformer.layers.2.linear1.bias", + "fusion.cross_view_transformer.layers.2.linear2.weight": "trunk.fusion.cross_view_transformer.layers.2.linear2.weight", + "fusion.cross_view_transformer.layers.2.linear2.bias": "trunk.fusion.cross_view_transformer.layers.2.linear2.bias", + "fusion.cross_view_transformer.layers.2.norm1.weight": "trunk.fusion.cross_view_transformer.layers.2.norm1.weight", + "fusion.cross_view_transformer.layers.2.norm1.bias": "trunk.fusion.cross_view_transformer.layers.2.norm1.bias", + "fusion.cross_view_transformer.layers.2.norm2.weight": "trunk.fusion.cross_view_transformer.layers.2.norm2.weight", + "fusion.cross_view_transformer.layers.2.norm2.bias": "trunk.fusion.cross_view_transformer.layers.2.norm2.bias", + "fusion.cross_view_transformer.layers.3.self_attn.in_proj_weight": "trunk.fusion.cross_view_transformer.layers.3.self_attn.in_proj_weight", + "fusion.cross_view_transformer.layers.3.self_attn.in_proj_bias": "trunk.fusion.cross_view_transformer.layers.3.self_attn.in_proj_bias", + "fusion.cross_view_transformer.layers.3.self_attn.out_proj.weight": "trunk.fusion.cross_view_transformer.layers.3.self_attn.out_proj.weight", + "fusion.cross_view_transformer.layers.3.self_attn.out_proj.bias": "trunk.fusion.cross_view_transformer.layers.3.self_attn.out_proj.bias", + "fusion.cross_view_transformer.layers.3.linear1.weight": "trunk.fusion.cross_view_transformer.layers.3.linear1.weight", + "fusion.cross_view_transformer.layers.3.linear1.bias": "trunk.fusion.cross_view_transformer.layers.3.linear1.bias", + "fusion.cross_view_transformer.layers.3.linear2.weight": "trunk.fusion.cross_view_transformer.layers.3.linear2.weight", + "fusion.cross_view_transformer.layers.3.linear2.bias": "trunk.fusion.cross_view_transformer.layers.3.linear2.bias", + "fusion.cross_view_transformer.layers.3.norm1.weight": "trunk.fusion.cross_view_transformer.layers.3.norm1.weight", + "fusion.cross_view_transformer.layers.3.norm1.bias": "trunk.fusion.cross_view_transformer.layers.3.norm1.bias", + "fusion.cross_view_transformer.layers.3.norm2.weight": "trunk.fusion.cross_view_transformer.layers.3.norm2.weight", + "fusion.cross_view_transformer.layers.3.norm2.bias": "trunk.fusion.cross_view_transformer.layers.3.norm2.bias", + "fusion.geometry_fusion.attn.in_proj_weight": "trunk.fusion.geometry_fusion.attn.in_proj_weight", + "fusion.geometry_fusion.attn.in_proj_bias": "trunk.fusion.geometry_fusion.attn.in_proj_bias", + "fusion.geometry_fusion.attn.out_proj.weight": "trunk.fusion.geometry_fusion.attn.out_proj.weight", + "fusion.geometry_fusion.attn.out_proj.bias": "trunk.fusion.geometry_fusion.attn.out_proj.bias", + "fusion.geometry_fusion.gate.0.weight": "trunk.fusion.geometry_fusion.gate.0.weight", + "fusion.geometry_fusion.gate.0.bias": "trunk.fusion.geometry_fusion.gate.0.bias", + "fusion.geometry_fusion.gate.1.weight": "trunk.fusion.geometry_fusion.gate.1.weight", + "fusion.geometry_fusion.gate.1.bias": "trunk.fusion.geometry_fusion.gate.1.bias", + "fusion.geometry_fusion.gate.3.weight": "trunk.fusion.geometry_fusion.gate.3.weight", + "fusion.geometry_fusion.gate.3.bias": "trunk.fusion.geometry_fusion.gate.3.bias", + "fusion.geometry_fusion.out.0.weight": "trunk.fusion.geometry_fusion.out.0.weight", + "fusion.geometry_fusion.out.0.bias": "trunk.fusion.geometry_fusion.out.0.bias", + "fusion.geometry_fusion.out.1.weight": "trunk.fusion.geometry_fusion.out.1.weight", + "fusion.geometry_fusion.out.1.bias": "trunk.fusion.geometry_fusion.out.1.bias", + "fusion.proprio_adapter.0.weight": "trunk.fusion.proprio_adapter.0.weight", + "fusion.proprio_adapter.0.bias": "trunk.fusion.proprio_adapter.0.bias", + "fusion.proprio_adapter.1.weight": "trunk.fusion.proprio_adapter.1.weight", + "fusion.proprio_adapter.1.bias": "trunk.fusion.proprio_adapter.1.bias", + "memory.uncertainty_head.0.weight": "trunk.memory.uncertainty_head.0.weight", + "memory.uncertainty_head.0.bias": "trunk.memory.uncertainty_head.0.bias", + "memory.uncertainty_head.1.weight": "trunk.memory.uncertainty_head.1.weight", + "memory.uncertainty_head.1.bias": "trunk.memory.uncertainty_head.1.bias", + "decoder.query_embed.weight": "trunk.decoder.query_embed.weight", + "decoder.coordination.0.weight": "trunk.decoder.coordination.0.weight", + "decoder.coordination.0.bias": "trunk.decoder.coordination.0.bias", + "decoder.coordination.1.weight": "trunk.decoder.coordination.1.weight", + "decoder.coordination.1.bias": "trunk.decoder.coordination.1.bias", + "decoder.coordination.3.weight": "trunk.decoder.coordination.3.weight", + "decoder.coordination.3.bias": "trunk.decoder.coordination.3.bias", + "elastic_state_head.interaction_queries": "adapter.state_head.interaction_queries", + "elastic_state_head.interaction_attention.in_proj_weight": "adapter.state_head.interaction_attention.in_proj_weight", + "elastic_state_head.interaction_attention.in_proj_bias": "adapter.state_head.interaction_attention.in_proj_bias", + "elastic_state_head.interaction_attention.out_proj.weight": "adapter.state_head.interaction_attention.out_proj.weight", + "elastic_state_head.interaction_attention.out_proj.bias": "adapter.state_head.interaction_attention.out_proj.bias", + "elastic_state_head.interaction_mlp.0.weight": "adapter.state_head.interaction_mlp.0.weight", + "elastic_state_head.interaction_mlp.0.bias": "adapter.state_head.interaction_mlp.0.bias", + "elastic_state_head.interaction_mlp.1.weight": "adapter.state_head.interaction_mlp.1.weight", + "elastic_state_head.interaction_mlp.1.bias": "adapter.state_head.interaction_mlp.1.bias", + "elastic_state_head.interaction_mlp.3.weight": "adapter.state_head.interaction_mlp.3.weight", + "elastic_state_head.interaction_mlp.3.bias": "adapter.state_head.interaction_mlp.3.bias", + "elastic_state_head.decoder.field_queries": "adapter.state_head.decoder.field_queries", + "elastic_state_head.decoder.field_attention.in_proj_weight": "adapter.state_head.decoder.field_attention.in_proj_weight", + "elastic_state_head.decoder.field_attention.in_proj_bias": "adapter.state_head.decoder.field_attention.in_proj_bias", + "elastic_state_head.decoder.field_attention.out_proj.weight": "adapter.state_head.decoder.field_attention.out_proj.weight", + "elastic_state_head.decoder.field_attention.out_proj.bias": "adapter.state_head.decoder.field_attention.out_proj.bias", + "elastic_state_head.decoder.field_mlp.0.weight": "adapter.state_head.decoder.field_mlp.0.weight", + "elastic_state_head.decoder.field_mlp.0.bias": "adapter.state_head.decoder.field_mlp.0.bias", + "elastic_state_head.decoder.field_mlp.1.weight": "adapter.state_head.decoder.field_mlp.1.weight", + "elastic_state_head.decoder.field_mlp.1.bias": "adapter.state_head.decoder.field_mlp.1.bias", + "elastic_state_head.decoder.field_mlp.3.weight": "adapter.state_head.decoder.field_mlp.3.weight", + "elastic_state_head.decoder.field_mlp.3.bias": "adapter.state_head.decoder.field_mlp.3.bias", + "elastic_state_head.decoder.summary_proj.0.weight": "adapter.state_head.decoder.summary_proj.0.weight", + "elastic_state_head.decoder.summary_proj.0.bias": "adapter.state_head.decoder.summary_proj.0.bias", + "elastic_state_head.decoder.summary_proj.1.weight": "adapter.state_head.decoder.summary_proj.1.weight", + "elastic_state_head.decoder.summary_proj.1.bias": "adapter.state_head.decoder.summary_proj.1.bias", + "elastic_state_head.decoder.phase_head.0.weight": "adapter.state_head.decoder.phase_head.0.weight", + "elastic_state_head.decoder.phase_head.0.bias": "adapter.state_head.decoder.phase_head.0.bias", + "elastic_state_head.decoder.phase_head.1.weight": "adapter.state_head.decoder.phase_head.1.weight", + "elastic_state_head.decoder.phase_head.1.bias": "adapter.state_head.decoder.phase_head.1.bias", + "elastic_state_head.decoder.phase_head.3.weight": "adapter.state_head.decoder.phase_head.3.weight", + "elastic_state_head.decoder.phase_head.3.bias": "adapter.state_head.decoder.phase_head.3.bias", + "elastic_state_head.decoder.arm_role_head.0.weight": "adapter.state_head.decoder.arm_role_head.0.weight", + "elastic_state_head.decoder.arm_role_head.0.bias": "adapter.state_head.decoder.arm_role_head.0.bias", + "elastic_state_head.decoder.arm_role_head.1.weight": "adapter.state_head.decoder.arm_role_head.1.weight", + "elastic_state_head.decoder.arm_role_head.1.bias": "adapter.state_head.decoder.arm_role_head.1.bias", + "elastic_state_head.decoder.arm_role_head.3.weight": "adapter.state_head.decoder.arm_role_head.3.weight", + "elastic_state_head.decoder.arm_role_head.3.bias": "adapter.state_head.decoder.arm_role_head.3.bias", + "elastic_state_head.decoder.arm_identity.weight": "adapter.state_head.decoder.arm_identity.weight", + "elastic_state_head.decoder.support_mode.0.weight": "adapter.state_head.decoder.support_mode.0.weight", + "elastic_state_head.decoder.support_mode.0.bias": "adapter.state_head.decoder.support_mode.0.bias", + "elastic_state_head.decoder.support_mode.1.weight": "adapter.state_head.decoder.support_mode.1.weight", + "elastic_state_head.decoder.support_mode.1.bias": "adapter.state_head.decoder.support_mode.1.bias", + "elastic_state_head.decoder.support_mode.3.weight": "adapter.state_head.decoder.support_mode.3.weight", + "elastic_state_head.decoder.support_mode.3.bias": "adapter.state_head.decoder.support_mode.3.bias", + "elastic_state_head.decoder.access_field.weight": "adapter.state_head.decoder.access_field.weight", + "elastic_state_head.decoder.access_field.bias": "adapter.state_head.decoder.access_field.bias", + "elastic_state_head.decoder.target_belief_field.weight": "adapter.state_head.decoder.target_belief_field.weight", + "elastic_state_head.decoder.target_belief_field.bias": "adapter.state_head.decoder.target_belief_field.bias", + "elastic_state_head.decoder.visibility_field.weight": "adapter.state_head.decoder.visibility_field.weight", + "elastic_state_head.decoder.visibility_field.bias": "adapter.state_head.decoder.visibility_field.bias", + "elastic_state_head.decoder.clearance_field.weight": "adapter.state_head.decoder.clearance_field.weight", + "elastic_state_head.decoder.clearance_field.bias": "adapter.state_head.decoder.clearance_field.bias", + "elastic_state_head.decoder.occluder_contact_field.weight": "adapter.state_head.decoder.occluder_contact_field.weight", + "elastic_state_head.decoder.occluder_contact_field.bias": "adapter.state_head.decoder.occluder_contact_field.bias", + "elastic_state_head.decoder.grasp_affordance_field.weight": "adapter.state_head.decoder.grasp_affordance_field.weight", + "elastic_state_head.decoder.grasp_affordance_field.bias": "adapter.state_head.decoder.grasp_affordance_field.bias", + "elastic_state_head.decoder.support_stability_field.weight": "adapter.state_head.decoder.support_stability_field.weight", + "elastic_state_head.decoder.support_stability_field.bias": "adapter.state_head.decoder.support_stability_field.bias", + "elastic_state_head.decoder.persistence_field.weight": "adapter.state_head.decoder.persistence_field.weight", + "elastic_state_head.decoder.persistence_field.bias": "adapter.state_head.decoder.persistence_field.bias", + "elastic_state_head.decoder.reocclusion_field.weight": "adapter.state_head.decoder.reocclusion_field.weight", + "elastic_state_head.decoder.reocclusion_field.bias": "adapter.state_head.decoder.reocclusion_field.bias", + "elastic_state_head.decoder.disturbance_field.weight": "adapter.state_head.decoder.disturbance_field.weight", + "elastic_state_head.decoder.disturbance_field.bias": "adapter.state_head.decoder.disturbance_field.bias", + "elastic_state_head.decoder.uncertainty_field.weight": "adapter.state_head.decoder.uncertainty_field.weight", + "elastic_state_head.decoder.uncertainty_field.bias": "adapter.state_head.decoder.uncertainty_field.bias", + "elastic_state_head.decoder.reocclusion_head.0.weight": "adapter.state_head.decoder.reocclusion_head.0.weight", + "elastic_state_head.decoder.reocclusion_head.0.bias": "adapter.state_head.decoder.reocclusion_head.0.bias", + "elastic_state_head.decoder.reocclusion_head.1.weight": "adapter.state_head.decoder.reocclusion_head.1.weight", + "elastic_state_head.decoder.reocclusion_head.1.bias": "adapter.state_head.decoder.reocclusion_head.1.bias", + "elastic_state_head.decoder.reocclusion_head.3.weight": "adapter.state_head.decoder.reocclusion_head.3.weight", + "elastic_state_head.decoder.reocclusion_head.3.bias": "adapter.state_head.decoder.reocclusion_head.3.bias", + "elastic_state_head.decoder.task_embedding.weight": "adapter.state_head.decoder.task_embedding.weight", + "elastic_state_head.decoder.task_field_affine.weight": "adapter.state_head.decoder.task_field_affine.weight", + "elastic_state_head.decoder.task_field_affine.bias": "adapter.state_head.decoder.task_field_affine.bias", + "elastic_state_head.decoder.task_summary_adapter.0.weight": "adapter.state_head.decoder.task_summary_adapter.0.weight", + "elastic_state_head.decoder.task_summary_adapter.0.bias": "adapter.state_head.decoder.task_summary_adapter.0.bias", + "elastic_state_head.decoder.task_summary_adapter.1.weight": "adapter.state_head.decoder.task_summary_adapter.1.weight", + "elastic_state_head.decoder.task_summary_adapter.1.bias": "adapter.state_head.decoder.task_summary_adapter.1.bias", + "elastic_state_head.decoder.task_phase_head.weight": "adapter.state_head.decoder.task_phase_head.weight", + "elastic_state_head.decoder.task_phase_head.bias": "adapter.state_head.decoder.task_phase_head.bias", + "elastic_state_head.decoder.task_support_head.weight": "adapter.state_head.decoder.task_support_head.weight", + "elastic_state_head.decoder.task_support_head.bias": "adapter.state_head.decoder.task_support_head.bias", + "elastic_state_head.decoder.task_reocclusion_head.weight": "adapter.state_head.decoder.task_reocclusion_head.weight", + "elastic_state_head.decoder.task_reocclusion_head.bias": "adapter.state_head.decoder.task_reocclusion_head.bias", + "elastic_state_head.decoder.task_metric_head.0.weight": "adapter.state_head.decoder.task_metric_head.0.weight", + "elastic_state_head.decoder.task_metric_head.0.bias": "adapter.state_head.decoder.task_metric_head.0.bias", + "elastic_state_head.decoder.task_metric_head.1.weight": "adapter.state_head.decoder.task_metric_head.1.weight", + "elastic_state_head.decoder.task_metric_head.1.bias": "adapter.state_head.decoder.task_metric_head.1.bias", + "elastic_state_head.decoder.task_metric_head.3.weight": "adapter.state_head.decoder.task_metric_head.3.weight", + "elastic_state_head.decoder.task_metric_head.3.bias": "adapter.state_head.decoder.task_metric_head.3.bias", + "world_model.state_encoder.0.weight": "adapter.transition_model.state_encoder.0.weight", + "world_model.state_encoder.0.bias": "adapter.transition_model.state_encoder.0.bias", + "world_model.state_encoder.1.weight": "adapter.transition_model.state_encoder.1.weight", + "world_model.state_encoder.1.bias": "adapter.transition_model.state_encoder.1.bias", + "world_model.action_encoder.0.weight": "adapter.transition_model.action_encoder.0.weight", + "world_model.action_encoder.0.bias": "adapter.transition_model.action_encoder.0.bias", + "world_model.action_encoder.1.weight": "adapter.transition_model.action_encoder.1.weight", + "world_model.action_encoder.1.bias": "adapter.transition_model.action_encoder.1.bias", + "world_model.transition.weight_hh": "adapter.transition_model.transition.weight_hh_l0", + "world_model.transition.bias_ih": "adapter.transition_model.transition.bias_ih_l0", + "world_model.transition.bias_hh": "adapter.transition_model.transition.bias_hh_l0", + "planner.residual.trunk.1.bias": "adapter.planner.reranker.network.1.bias", + "planner.residual.trunk.3.weight": "adapter.planner.reranker.network.3.weight", + "planner.residual.trunk.3.bias": "adapter.planner.reranker.network.3.bias", + "planner.residual.success_head.weight": "adapter.planner.reranker.success_head.weight", + "planner.residual.success_head.bias": "adapter.planner.reranker.success_head.bias", + "planner.residual.risk_head.weight": "adapter.planner.reranker.risk_head.weight", + "planner.residual.risk_head.bias": "adapter.planner.reranker.risk_head.bias", + "planner.residual.residual_head.weight": "adapter.planner.reranker.score_head.weight", + "planner.residual.residual_head.bias": "adapter.planner.reranker.score_head.bias" + }, + "missing_keys": [ + "trunk.memory.gru.weight_ih_l0", + "trunk.memory.gru.weight_hh_l0", + "trunk.memory.gru.bias_ih_l0", + "trunk.memory.gru.bias_hh_l0", + "trunk.memory.gru.weight_ih_l1", + "trunk.memory.gru.weight_hh_l1", + "trunk.memory.gru.bias_ih_l1", + "trunk.memory.gru.bias_hh_l1", + "trunk.memory.token_proj.0.weight", + "trunk.memory.token_proj.0.bias", + "trunk.memory.token_proj.1.weight", + "trunk.memory.token_proj.1.bias", + "trunk.memory.action_proj.0.weight", + "trunk.memory.action_proj.0.bias", + "trunk.memory.action_proj.1.weight", + "trunk.memory.action_proj.1.bias", + "trunk.decoder.actor_role_bias", + "trunk.decoder.revealer_decoder.layers.0.self_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.0.self_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.0.self_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.0.self_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.0.multihead_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.0.multihead_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.0.multihead_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.0.multihead_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.0.linear1.weight", + "trunk.decoder.revealer_decoder.layers.0.linear1.bias", + "trunk.decoder.revealer_decoder.layers.0.linear2.weight", + "trunk.decoder.revealer_decoder.layers.0.linear2.bias", + "trunk.decoder.revealer_decoder.layers.0.norm1.weight", + "trunk.decoder.revealer_decoder.layers.0.norm1.bias", + "trunk.decoder.revealer_decoder.layers.0.norm2.weight", + "trunk.decoder.revealer_decoder.layers.0.norm2.bias", + "trunk.decoder.revealer_decoder.layers.0.norm3.weight", + "trunk.decoder.revealer_decoder.layers.0.norm3.bias", + "trunk.decoder.revealer_decoder.layers.1.self_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.1.self_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.1.self_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.1.self_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.1.multihead_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.1.multihead_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.1.multihead_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.1.multihead_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.1.linear1.weight", + "trunk.decoder.revealer_decoder.layers.1.linear1.bias", + "trunk.decoder.revealer_decoder.layers.1.linear2.weight", + "trunk.decoder.revealer_decoder.layers.1.linear2.bias", + "trunk.decoder.revealer_decoder.layers.1.norm1.weight", + "trunk.decoder.revealer_decoder.layers.1.norm1.bias", + "trunk.decoder.revealer_decoder.layers.1.norm2.weight", + "trunk.decoder.revealer_decoder.layers.1.norm2.bias", + "trunk.decoder.revealer_decoder.layers.1.norm3.weight", + "trunk.decoder.revealer_decoder.layers.1.norm3.bias", + "trunk.decoder.revealer_decoder.layers.2.self_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.2.self_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.2.self_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.2.self_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.2.multihead_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.2.multihead_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.2.multihead_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.2.multihead_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.2.linear1.weight", + "trunk.decoder.revealer_decoder.layers.2.linear1.bias", + "trunk.decoder.revealer_decoder.layers.2.linear2.weight", + "trunk.decoder.revealer_decoder.layers.2.linear2.bias", + "trunk.decoder.revealer_decoder.layers.2.norm1.weight", + "trunk.decoder.revealer_decoder.layers.2.norm1.bias", + "trunk.decoder.revealer_decoder.layers.2.norm2.weight", + "trunk.decoder.revealer_decoder.layers.2.norm2.bias", + "trunk.decoder.revealer_decoder.layers.2.norm3.weight", + "trunk.decoder.revealer_decoder.layers.2.norm3.bias", + "trunk.decoder.revealer_decoder.layers.3.self_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.3.self_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.3.self_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.3.self_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.3.multihead_attn.in_proj_weight", + "trunk.decoder.revealer_decoder.layers.3.multihead_attn.in_proj_bias", + "trunk.decoder.revealer_decoder.layers.3.multihead_attn.out_proj.weight", + "trunk.decoder.revealer_decoder.layers.3.multihead_attn.out_proj.bias", + "trunk.decoder.revealer_decoder.layers.3.linear1.weight", + "trunk.decoder.revealer_decoder.layers.3.linear1.bias", + "trunk.decoder.revealer_decoder.layers.3.linear2.weight", + "trunk.decoder.revealer_decoder.layers.3.linear2.bias", + "trunk.decoder.revealer_decoder.layers.3.norm1.weight", + "trunk.decoder.revealer_decoder.layers.3.norm1.bias", + "trunk.decoder.revealer_decoder.layers.3.norm2.weight", + "trunk.decoder.revealer_decoder.layers.3.norm2.bias", + "trunk.decoder.revealer_decoder.layers.3.norm3.weight", + "trunk.decoder.revealer_decoder.layers.3.norm3.bias", + "trunk.decoder.actor_decoder.layers.0.self_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.0.self_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.0.self_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.0.self_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.0.multihead_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.0.multihead_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.0.multihead_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.0.multihead_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.0.linear1.weight", + "trunk.decoder.actor_decoder.layers.0.linear1.bias", + "trunk.decoder.actor_decoder.layers.0.linear2.weight", + "trunk.decoder.actor_decoder.layers.0.linear2.bias", + "trunk.decoder.actor_decoder.layers.0.norm1.weight", + "trunk.decoder.actor_decoder.layers.0.norm1.bias", + "trunk.decoder.actor_decoder.layers.0.norm2.weight", + "trunk.decoder.actor_decoder.layers.0.norm2.bias", + "trunk.decoder.actor_decoder.layers.0.norm3.weight", + "trunk.decoder.actor_decoder.layers.0.norm3.bias", + "trunk.decoder.actor_decoder.layers.1.self_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.1.self_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.1.self_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.1.self_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.1.multihead_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.1.multihead_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.1.multihead_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.1.multihead_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.1.linear1.weight", + "trunk.decoder.actor_decoder.layers.1.linear1.bias", + "trunk.decoder.actor_decoder.layers.1.linear2.weight", + "trunk.decoder.actor_decoder.layers.1.linear2.bias", + "trunk.decoder.actor_decoder.layers.1.norm1.weight", + "trunk.decoder.actor_decoder.layers.1.norm1.bias", + "trunk.decoder.actor_decoder.layers.1.norm2.weight", + "trunk.decoder.actor_decoder.layers.1.norm2.bias", + "trunk.decoder.actor_decoder.layers.1.norm3.weight", + "trunk.decoder.actor_decoder.layers.1.norm3.bias", + "trunk.decoder.actor_decoder.layers.2.self_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.2.self_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.2.self_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.2.self_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.2.multihead_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.2.multihead_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.2.multihead_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.2.multihead_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.2.linear1.weight", + "trunk.decoder.actor_decoder.layers.2.linear1.bias", + "trunk.decoder.actor_decoder.layers.2.linear2.weight", + "trunk.decoder.actor_decoder.layers.2.linear2.bias", + "trunk.decoder.actor_decoder.layers.2.norm1.weight", + "trunk.decoder.actor_decoder.layers.2.norm1.bias", + "trunk.decoder.actor_decoder.layers.2.norm2.weight", + "trunk.decoder.actor_decoder.layers.2.norm2.bias", + "trunk.decoder.actor_decoder.layers.2.norm3.weight", + "trunk.decoder.actor_decoder.layers.2.norm3.bias", + "trunk.decoder.actor_decoder.layers.3.self_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.3.self_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.3.self_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.3.self_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.3.multihead_attn.in_proj_weight", + "trunk.decoder.actor_decoder.layers.3.multihead_attn.in_proj_bias", + "trunk.decoder.actor_decoder.layers.3.multihead_attn.out_proj.weight", + "trunk.decoder.actor_decoder.layers.3.multihead_attn.out_proj.bias", + "trunk.decoder.actor_decoder.layers.3.linear1.weight", + "trunk.decoder.actor_decoder.layers.3.linear1.bias", + "trunk.decoder.actor_decoder.layers.3.linear2.weight", + "trunk.decoder.actor_decoder.layers.3.linear2.bias", + "trunk.decoder.actor_decoder.layers.3.norm1.weight", + "trunk.decoder.actor_decoder.layers.3.norm1.bias", + "trunk.decoder.actor_decoder.layers.3.norm2.weight", + "trunk.decoder.actor_decoder.layers.3.norm2.bias", + "trunk.decoder.actor_decoder.layers.3.norm3.weight", + "trunk.decoder.actor_decoder.layers.3.norm3.bias", + "trunk.decoder.revealer_mean.weight", + "trunk.decoder.revealer_mean.bias", + "trunk.decoder.revealer_log_std.weight", + "trunk.decoder.revealer_log_std.bias", + "trunk.decoder.actor_mean.weight", + "trunk.decoder.actor_mean.bias", + "trunk.decoder.actor_log_std.weight", + "trunk.decoder.actor_log_std.bias", + "trunk.decoder.proposal_score.0.weight", + "trunk.decoder.proposal_score.0.bias", + "trunk.decoder.proposal_score.1.weight", + "trunk.decoder.proposal_score.1.bias", + "adapter.proposal_prior.task_embedding.weight", + "adapter.proposal_prior.context_proj.0.weight", + "adapter.proposal_prior.context_proj.0.bias", + "adapter.proposal_prior.context_proj.1.weight", + "adapter.proposal_prior.context_proj.1.bias", + "adapter.proposal_prior.mode_score_head.weight", + "adapter.proposal_prior.mode_score_head.bias", + "adapter.proposal_prior.mode_residual_heads.0.0.weight", + "adapter.proposal_prior.mode_residual_heads.0.0.bias", + "adapter.proposal_prior.mode_residual_heads.0.1.weight", + "adapter.proposal_prior.mode_residual_heads.0.1.bias", + "adapter.proposal_prior.mode_residual_heads.0.3.weight", + "adapter.proposal_prior.mode_residual_heads.0.3.bias", + "adapter.proposal_prior.mode_residual_heads.1.0.weight", + "adapter.proposal_prior.mode_residual_heads.1.0.bias", + "adapter.proposal_prior.mode_residual_heads.1.1.weight", + "adapter.proposal_prior.mode_residual_heads.1.1.bias", + "adapter.proposal_prior.mode_residual_heads.1.3.weight", + "adapter.proposal_prior.mode_residual_heads.1.3.bias", + "adapter.proposal_prior.mode_residual_heads.2.0.weight", + "adapter.proposal_prior.mode_residual_heads.2.0.bias", + "adapter.proposal_prior.mode_residual_heads.2.1.weight", + "adapter.proposal_prior.mode_residual_heads.2.1.bias", + "adapter.proposal_prior.mode_residual_heads.2.3.weight", + "adapter.proposal_prior.mode_residual_heads.2.3.bias", + "adapter.proposal_prior.mode_residual_heads.3.0.weight", + "adapter.proposal_prior.mode_residual_heads.3.0.bias", + "adapter.proposal_prior.mode_residual_heads.3.1.weight", + "adapter.proposal_prior.mode_residual_heads.3.1.bias", + "adapter.proposal_prior.mode_residual_heads.3.3.weight", + "adapter.proposal_prior.mode_residual_heads.3.3.bias", + "adapter.proposal_prior.mode_residual_heads.4.0.weight", + "adapter.proposal_prior.mode_residual_heads.4.0.bias", + "adapter.proposal_prior.mode_residual_heads.4.1.weight", + "adapter.proposal_prior.mode_residual_heads.4.1.bias", + "adapter.proposal_prior.mode_residual_heads.4.3.weight", + "adapter.proposal_prior.mode_residual_heads.4.3.bias", + "adapter.proposal_prior.mode_residual_heads.5.0.weight", + "adapter.proposal_prior.mode_residual_heads.5.0.bias", + "adapter.proposal_prior.mode_residual_heads.5.1.weight", + "adapter.proposal_prior.mode_residual_heads.5.1.bias", + "adapter.proposal_prior.mode_residual_heads.5.3.weight", + "adapter.proposal_prior.mode_residual_heads.5.3.bias", + "adapter.proposal_prior.mode_residual_heads.6.0.weight", + "adapter.proposal_prior.mode_residual_heads.6.0.bias", + "adapter.proposal_prior.mode_residual_heads.6.1.weight", + "adapter.proposal_prior.mode_residual_heads.6.1.bias", + "adapter.proposal_prior.mode_residual_heads.6.3.weight", + "adapter.proposal_prior.mode_residual_heads.6.3.bias", + "adapter.proposal_prior.slot_embedding.weight", + "adapter.proposal_prior.slot_delta.0.weight", + "adapter.proposal_prior.slot_delta.0.bias", + "adapter.proposal_prior.slot_delta.1.weight", + "adapter.proposal_prior.slot_delta.1.bias", + "adapter.proposal_prior.slot_delta.3.weight", + "adapter.proposal_prior.slot_delta.3.bias", + "adapter.transition_model.mode_embedding.weight", + "adapter.transition_model.transition.weight_ih_l0", + "adapter.transition_model.summary_decoder.weight", + "adapter.transition_model.summary_decoder.bias", + "adapter.transition_model.access_field_head.weight", + "adapter.transition_model.access_field_head.bias", + "adapter.transition_model.support_field_head.weight", + "adapter.transition_model.support_field_head.bias", + "adapter.reveal_cache.summary_proj.0.weight", + "adapter.reveal_cache.summary_proj.0.bias", + "adapter.reveal_cache.summary_proj.1.weight", + "adapter.reveal_cache.summary_proj.1.bias", + "adapter.planner.reranker.network.0.weight", + "adapter.planner.reranker.network.0.bias", + "adapter.planner.reranker.network.1.weight" + ], + "unexpected_keys": [] + } +} \ No newline at end of file