lsnu commited on
Commit
5ce8761
·
verified ·
1 Parent(s): 6e9fe6b

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +212 -0
  2. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/.gitignore +2 -0
  3. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/CACHEDIR.TAG +4 -0
  4. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/README.md +8 -0
  5. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/lastfailed +1 -0
  6. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/nodeids +11 -0
  7. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/public_benchmark_package_v1.json +202 -0
  8. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md +114 -0
  9. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md +87 -0
  10. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md +102 -0
  11. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_hybrid_public_benchmark_smoke.py +142 -0
  12. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py +887 -0
  13. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py +855 -0
  14. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py +184 -0
  15. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py +91 -0
  16. code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py +63 -0
  17. code/VLAarchtests4_root/MODEL_AND_ARTIFACT_INDEX.md +53 -0
  18. code/VLAarchtests4_root/PUBLIC_BENCHMARK_RESULTS.md +217 -0
  19. code/VLAarchtests4_root/README.md +407 -0
  20. code/VLAarchtests4_root/docs/maniskill_pickclutter_correction_log_2026-04-01.md +187 -0
  21. code/VLAarchtests4_root/docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md +134 -0
  22. code/VLAarchtests4_root/docs/public_benchmark_progress_2026-04-01.md +91 -0
  23. code/VLAarchtests4_root/docs/public_bridge_smoke_run_log_2026-04-01.md +116 -0
  24. code/VLAarchtests4_root/setup/public_benchmark/ENVIRONMENT.md +68 -0
  25. code/VLAarchtests4_root/setup/public_benchmark/env_vars.sh +10 -0
  26. code/VLAarchtests4_root/setup/public_benchmark/gpu_short.txt +1 -0
  27. code/VLAarchtests4_root/setup/public_benchmark/hf_env.txt +35 -0
  28. code/VLAarchtests4_root/setup/public_benchmark/nvidia_smi.txt +21 -0
  29. code/VLAarchtests4_root/setup/public_benchmark/pip_freeze_python311.txt +209 -0
  30. code/VLAarchtests4_root/setup/public_benchmark/python_version.txt +1 -0
  31. code/VLAarchtests4_root/setup/public_benchmark/rlbench_env_pip_freeze.txt +181 -0
  32. code/VLAarchtests4_root/setup/public_benchmark/uname.txt +1 -0
  33. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode12/low_dim_obs.pkl +3 -0
  34. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode15/low_dim_obs.pkl +3 -0
  35. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode18/low_dim_obs.pkl +3 -0
  36. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode20/low_dim_obs.pkl +3 -0
  37. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode27/low_dim_obs.pkl +3 -0
  38. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode34/low_dim_obs.pkl +3 -0
  39. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode4/low_dim_obs.pkl +3 -0
  40. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode43/low_dim_obs.pkl +3 -0
  41. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode49/low_dim_obs.pkl +3 -0
  42. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode50/low_dim_obs.pkl +3 -0
  43. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode57/low_dim_obs.pkl +3 -0
  44. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode62/low_dim_obs.pkl +3 -0
  45. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode65/low_dim_obs.pkl +3 -0
  46. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode68/low_dim_obs.pkl +3 -0
  47. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode71/low_dim_obs.pkl +3 -0
  48. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode76/low_dim_obs.pkl +3 -0
  49. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode81/low_dim_obs.pkl +3 -0
  50. data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode86/low_dim_obs.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,215 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ third_party/AnyBimanual/agents/rvt/rvt/libs/point-renderer/demo.png filter=lfs diff=lfs merge=lfs -text
37
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Baxter.ttm filter=lfs diff=lfs merge=lfs -text
38
+ third_party/AnyBimanual/third_party/PyRep/pyrep/backend/_sim_cffi.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
39
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_youbot_navigation.ttt filter=lfs diff=lfs merge=lfs -text
40
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_turtlebot_navigation.ttt filter=lfs diff=lfs merge=lfs -text
41
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_reinforcement_learning_env.ttt filter=lfs diff=lfs merge=lfs -text
42
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_baxter_pick_and_pass.ttt filter=lfs diff=lfs merge=lfs -text
43
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_locobot_stack_cube.ttt filter=lfs diff=lfs merge=lfs -text
44
+ third_party/AnyBimanual/third_party/PyRep/examples/scene_panda_reach_target.ttt filter=lfs diff=lfs merge=lfs -text
45
+ third_party/AnyBimanual/third_party/RLBench/readme_files/task_grid.png filter=lfs diff=lfs merge=lfs -text
46
+ third_party/AnyBimanual/third_party/PyRep/tutorials/images/kinematics_group.png filter=lfs diff=lfs merge=lfs -text
47
+ third_party/AnyBimanual/third_party/PyRep/tutorials/images/collision_collections.png filter=lfs diff=lfs merge=lfs -text
48
+ third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_robots.ttt filter=lfs diff=lfs merge=lfs -text
49
+ third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene.ttt filter=lfs diff=lfs merge=lfs -text
50
+ third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_mobiles.ttt filter=lfs diff=lfs merge=lfs -text
51
+ third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_mobiles_with_arms.ttt filter=lfs diff=lfs merge=lfs -text
52
+ third_party/AnyBimanual/third_party/PyRep/tests/assets/cracker_box/texture_map.png filter=lfs diff=lfs merge=lfs -text
53
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/LineTracer.ttm filter=lfs diff=lfs merge=lfs -text
54
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/youBot.ttm filter=lfs diff=lfs merge=lfs -text
55
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/Robotiq85.ttm filter=lfs diff=lfs merge=lfs -text
56
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/turtlebot.ttm filter=lfs diff=lfs merge=lfs -text
57
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/PandaGripper.ttm filter=lfs diff=lfs merge=lfs -text
58
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/LoCoBot.ttm filter=lfs diff=lfs merge=lfs -text
59
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/MicoHand.ttm filter=lfs diff=lfs merge=lfs -text
60
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/JacoHand.ttm filter=lfs diff=lfs merge=lfs -text
61
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/BaxterGripper.ttm filter=lfs diff=lfs merge=lfs -text
62
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR5.ttm filter=lfs diff=lfs merge=lfs -text
63
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR3.ttm filter=lfs diff=lfs merge=lfs -text
64
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR10.ttm filter=lfs diff=lfs merge=lfs -text
65
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/XArm7.ttm filter=lfs diff=lfs merge=lfs -text
66
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Sawyer.ttm filter=lfs diff=lfs merge=lfs -text
67
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Panda.ttm filter=lfs diff=lfs merge=lfs -text
68
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Mico.ttm filter=lfs diff=lfs merge=lfs -text
69
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/LBR_iiwa_14_R820.ttm filter=lfs diff=lfs merge=lfs -text
70
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/LBR_iiwa_7_R800.ttm filter=lfs diff=lfs merge=lfs -text
71
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Jaco.ttm filter=lfs diff=lfs merge=lfs -text
72
+ third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Dobot.ttm filter=lfs diff=lfs merge=lfs -text
73
+ third_party/3d_flowmatch_actor/fig/peract_hiveformer.jpg filter=lfs diff=lfs merge=lfs -text
74
+ third_party/3d_flowmatch_actor/fig/peract2.jpg filter=lfs diff=lfs merge=lfs -text
75
+ third_party/3d_flowmatch_actor/fig/diagram_gif.gif filter=lfs diff=lfs merge=lfs -text
76
+ third_party/AnyBimanual/third_party/PyRep/build/temp.linux-x86_64-cpython-311/build/pyrep/backend/pyrep/backend/_sim_cffi.o filter=lfs diff=lfs merge=lfs -text
77
+ third_party/AnyBimanual/third_party/PyRep/build/lib.linux-x86_64-cpython-311/pyrep/backend/_sim_cffi.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
78
+ third_party/AnyBimanual/docs/logo.png filter=lfs diff=lfs merge=lfs -text
79
+ third_party/AnyBimanual/docs/pipeline.png filter=lfs diff=lfs merge=lfs -text
80
+ third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/1.png filter=lfs diff=lfs merge=lfs -text
81
+ third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/2.png filter=lfs diff=lfs merge=lfs -text
82
+ third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/0.png filter=lfs diff=lfs merge=lfs -text
83
+ third_party/DexGarmentLab/Data/Hang_Coat_Validation_HALO/final_state_pic/img_1.png filter=lfs diff=lfs merge=lfs -text
84
+ third_party/DexGarmentLab/Data/Hang_Coat_Validation_HALO/final_state_pic/img_0.png filter=lfs diff=lfs merge=lfs -text
85
+ third_party/AnyBimanual/third_party/pytorch3d/tests/pulsar/reference/examples_TestRenderer_test_smallopt.png filter=lfs diff=lfs merge=lfs -text
86
+ third_party/AnyBimanual/third_party/pytorch3d/tests/pulsar/reference/examples_TestRenderer_test_cam.png filter=lfs diff=lfs merge=lfs -text
87
+ third_party/AnyBimanual/third_party/pytorch3d/tests/data/test_cow_image_rectangle_MeshRasterizerOpenGL.png filter=lfs diff=lfs merge=lfs -text
88
+ third_party/AnyBimanual/third_party/pytorch3d/tests/data/cow.glb filter=lfs diff=lfs merge=lfs -text
89
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/transforms_overview.jpg filter=lfs diff=lfs merge=lfs -text
90
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/world_camera_image.jpg filter=lfs diff=lfs merge=lfs -text
91
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/p3d_vs_softras.png filter=lfs diff=lfs merge=lfs -text
92
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/p3d_naive_vs_coarse.png filter=lfs diff=lfs merge=lfs -text
93
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/meshrcnn.png filter=lfs diff=lfs merge=lfs -text
94
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/iou3d.gif filter=lfs diff=lfs merge=lfs -text
95
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/batch_intro.png filter=lfs diff=lfs merge=lfs -text
96
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/batch_modes.gif filter=lfs diff=lfs merge=lfs -text
97
+ third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/architecture_renderer.jpg filter=lfs diff=lfs merge=lfs -text
98
+ third_party/AnyBimanual/third_party/pytorch3d/.github/shapenet_render.png filter=lfs diff=lfs merge=lfs -text
99
+ third_party/AnyBimanual/third_party/pytorch3d/.github/render_textured_mesh.gif filter=lfs diff=lfs merge=lfs -text
100
+ third_party/AnyBimanual/third_party/pytorch3d/.github/nerf_project_logo.gif filter=lfs diff=lfs merge=lfs -text
101
+ third_party/AnyBimanual/third_party/pytorch3d/.github/fit_nerf.gif filter=lfs diff=lfs merge=lfs -text
102
+ third_party/AnyBimanual/third_party/pytorch3d/.github/fit_textured_volume.gif filter=lfs diff=lfs merge=lfs -text
103
+ third_party/AnyBimanual/third_party/pytorch3d/.github/implicitron_config.gif filter=lfs diff=lfs merge=lfs -text
104
+ third_party/AnyBimanual/third_party/pytorch3d/.github/densepose_render.png filter=lfs diff=lfs merge=lfs -text
105
+ third_party/AnyBimanual/third_party/pytorch3d/.github/dolphin_deform.gif filter=lfs diff=lfs merge=lfs -text
106
+ third_party/AnyBimanual/third_party/pytorch3d/.github/cow_deform.gif filter=lfs diff=lfs merge=lfs -text
107
+ third_party/AnyBimanual/third_party/pytorch3d/.github/camera_position_teapot.gif filter=lfs diff=lfs merge=lfs -text
108
+ third_party/AnyBimanual/third_party/pytorch3d/.github/bundle_adjust.gif filter=lfs diff=lfs merge=lfs -text
109
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/waypoints_added.png filter=lfs diff=lfs merge=lfs -text
110
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/w0_ext_string.png filter=lfs diff=lfs merge=lfs -text
111
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/translate_cuboid_labeled.png filter=lfs diff=lfs merge=lfs -text
112
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/task_design_empty_labeled.png filter=lfs diff=lfs merge=lfs -text
113
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/task_design_empty.png filter=lfs diff=lfs merge=lfs -text
114
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/spawn_boundary.png filter=lfs diff=lfs merge=lfs -text
115
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/set_dummy_orient_labeled.png filter=lfs diff=lfs merge=lfs -text
116
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/scaling_factors_labeled.png filter=lfs diff=lfs merge=lfs -text
117
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/save_builder.gif filter=lfs diff=lfs merge=lfs -text
118
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/slide_block_to_target.gif filter=lfs diff=lfs merge=lfs -text
119
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/rotate_wall.png filter=lfs diff=lfs merge=lfs -text
120
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/pos_relto_parent.png filter=lfs diff=lfs merge=lfs -text
121
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/make_grey.png filter=lfs diff=lfs merge=lfs -text
122
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/large_container_scene.png filter=lfs diff=lfs merge=lfs -text
123
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/just_boxes.png filter=lfs diff=lfs merge=lfs -text
124
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/grouping.gif filter=lfs diff=lfs merge=lfs -text
125
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/invert_vis_layer.gif filter=lfs diff=lfs merge=lfs -text
126
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/cuboid_plane.png filter=lfs diff=lfs merge=lfs -text
127
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/empty_container.gif filter=lfs diff=lfs merge=lfs -text
128
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/container_wall.png filter=lfs diff=lfs merge=lfs -text
129
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/change_proxsense_size_labeled.png filter=lfs diff=lfs merge=lfs -text
130
+ third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/1base_1wall.png filter=lfs diff=lfs merge=lfs -text
131
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_design_bimanual.ttt filter=lfs diff=lfs merge=lfs -text
132
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_design.ttt filter=lfs diff=lfs merge=lfs -text
133
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/weighing_scales.ttm filter=lfs diff=lfs merge=lfs -text
134
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/tv_on.ttm filter=lfs diff=lfs merge=lfs -text
135
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/water_plants.ttm filter=lfs diff=lfs merge=lfs -text
136
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/unplug_charger.ttm filter=lfs diff=lfs merge=lfs -text
137
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/turn_oven_on.ttm filter=lfs diff=lfs merge=lfs -text
138
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/turn_tap.ttm filter=lfs diff=lfs merge=lfs -text
139
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_usb_out_of_computer.ttm filter=lfs diff=lfs merge=lfs -text
140
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
141
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_umbrella_out_of_umbrella_stand.ttm filter=lfs diff=lfs merge=lfs -text
142
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_lid_off_saucepan.ttm filter=lfs diff=lfs merge=lfs -text
143
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
144
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_money_out_safe.ttm filter=lfs diff=lfs merge=lfs -text
145
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_off_weighing_scales.ttm filter=lfs diff=lfs merge=lfs -text
146
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/sweep_to_dustpan.ttm filter=lfs diff=lfs merge=lfs -text
147
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/sweep_to_dustpan_of_size.ttm filter=lfs diff=lfs merge=lfs -text
148
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_frame_off_hanger.ttm filter=lfs diff=lfs merge=lfs -text
149
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/straighten_rope.ttm filter=lfs diff=lfs merge=lfs -text
150
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_wine.ttm filter=lfs diff=lfs merge=lfs -text
151
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_cups.ttm filter=lfs diff=lfs merge=lfs -text
152
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_chairs.ttm filter=lfs diff=lfs merge=lfs -text
153
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_cup_out_from_cabinet.ttm filter=lfs diff=lfs merge=lfs -text
154
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/solve_puzzle.ttm filter=lfs diff=lfs merge=lfs -text
155
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/setup_chess.ttm filter=lfs diff=lfs merge=lfs -text
156
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/slide_cabinet_open_and_place_cups.ttm filter=lfs diff=lfs merge=lfs -text
157
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/setup_checkers.ttm filter=lfs diff=lfs merge=lfs -text
158
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/slide_cabinet_open.ttm filter=lfs diff=lfs merge=lfs -text
159
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/set_the_table.ttm filter=lfs diff=lfs merge=lfs -text
160
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/set_clock_to_time.ttm filter=lfs diff=lfs merge=lfs -text
161
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/scoop_with_spatula.ttm filter=lfs diff=lfs merge=lfs -text
162
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/remove_cups.ttm filter=lfs diff=lfs merge=lfs -text
163
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_tray_in_oven.ttm filter=lfs diff=lfs merge=lfs -text
164
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_umbrella_in_umbrella_stand.ttm filter=lfs diff=lfs merge=lfs -text
165
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_rubbish_in_color_bin.ttm filter=lfs diff=lfs merge=lfs -text
166
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_rubbish_in_bin.ttm filter=lfs diff=lfs merge=lfs -text
167
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_shoes_in_box.ttm filter=lfs diff=lfs merge=lfs -text
168
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_money_in_safe.ttm filter=lfs diff=lfs merge=lfs -text
169
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_knife_on_chopping_board.ttm filter=lfs diff=lfs merge=lfs -text
170
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_knife_in_knife_block.ttm filter=lfs diff=lfs merge=lfs -text
171
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_groceries_in_cupboard.ttm filter=lfs diff=lfs merge=lfs -text
172
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_bottle_in_fridge.ttm filter=lfs diff=lfs merge=lfs -text
173
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_all_groceries_in_cupboard.ttm filter=lfs diff=lfs merge=lfs -text
174
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pour_from_cup_to_cup.ttm filter=lfs diff=lfs merge=lfs -text
175
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/press_switch.ttm filter=lfs diff=lfs merge=lfs -text
176
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_books_at_shelf_location.ttm filter=lfs diff=lfs merge=lfs -text
177
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_books_on_bookshelf.ttm filter=lfs diff=lfs merge=lfs -text
178
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/plug_charger_in_power_supply.ttm filter=lfs diff=lfs merge=lfs -text
179
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/play_jenga.ttm filter=lfs diff=lfs merge=lfs -text
180
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_wine_at_rack_location.ttm filter=lfs diff=lfs merge=lfs -text
181
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_shape_in_shape_sorter.ttm filter=lfs diff=lfs merge=lfs -text
182
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_hanger_on_rack.ttm filter=lfs diff=lfs merge=lfs -text
183
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_and_lift_small.ttm filter=lfs diff=lfs merge=lfs -text
184
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_up_cup.ttm filter=lfs diff=lfs merge=lfs -text
185
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_and_lift.ttm filter=lfs diff=lfs merge=lfs -text
186
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_cups.ttm filter=lfs diff=lfs merge=lfs -text
187
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/phone_on_base.ttm filter=lfs diff=lfs merge=lfs -text
188
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_wine_bottle.ttm filter=lfs diff=lfs merge=lfs -text
189
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_washing_machine.ttm filter=lfs diff=lfs merge=lfs -text
190
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_oven.ttm filter=lfs diff=lfs merge=lfs -text
191
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_microwave.ttm filter=lfs diff=lfs merge=lfs -text
192
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_grill.ttm filter=lfs diff=lfs merge=lfs -text
193
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_fridge.ttm filter=lfs diff=lfs merge=lfs -text
194
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_window.ttm filter=lfs diff=lfs merge=lfs -text
195
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_jar.ttm filter=lfs diff=lfs merge=lfs -text
196
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_door.ttm filter=lfs diff=lfs merge=lfs -text
197
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/move_hanger.ttm filter=lfs diff=lfs merge=lfs -text
198
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_box.ttm filter=lfs diff=lfs merge=lfs -text
199
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/light_bulb_out.ttm filter=lfs diff=lfs merge=lfs -text
200
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/light_bulb_in.ttm filter=lfs diff=lfs merge=lfs -text
201
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/meat_on_grill.ttm filter=lfs diff=lfs merge=lfs -text
202
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/meat_off_grill.ttm filter=lfs diff=lfs merge=lfs -text
203
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/lamp_off.ttm filter=lfs diff=lfs merge=lfs -text
204
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/lamp_on.ttm filter=lfs diff=lfs merge=lfs -text
205
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/insert_usb_in_computer.ttm filter=lfs diff=lfs merge=lfs -text
206
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hit_ball_with_queue.ttm filter=lfs diff=lfs merge=lfs -text
207
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hang_frame_on_hanger.ttm filter=lfs diff=lfs merge=lfs -text
208
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hockey.ttm filter=lfs diff=lfs merge=lfs -text
209
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/empty_dishwasher.ttm filter=lfs diff=lfs merge=lfs -text
210
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/get_ice_from_fridge.ttm filter=lfs diff=lfs merge=lfs -text
211
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/cut_vegetables.ttm filter=lfs diff=lfs merge=lfs -text
212
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
213
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_put_bottle_in_fridge.ttm filter=lfs diff=lfs merge=lfs -text
214
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
215
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_lift_stick.ttm filter=lfs diff=lfs merge=lfs -text
216
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_microwave.ttm filter=lfs diff=lfs merge=lfs -text
217
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_close_jar.ttm filter=lfs diff=lfs merge=lfs -text
218
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_laptop_lid.ttm filter=lfs diff=lfs merge=lfs -text
219
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_jar.ttm filter=lfs diff=lfs merge=lfs -text
220
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_grill.ttm filter=lfs diff=lfs merge=lfs -text
221
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_fridge.ttm filter=lfs diff=lfs merge=lfs -text
222
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_door.ttm filter=lfs diff=lfs merge=lfs -text
223
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_box.ttm filter=lfs diff=lfs merge=lfs -text
224
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/change_clock.ttm filter=lfs diff=lfs merge=lfs -text
225
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/change_channel.ttm filter=lfs diff=lfs merge=lfs -text
226
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_sweep_to_dustpan.ttm filter=lfs diff=lfs merge=lfs -text
227
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_straighten_rope.ttm filter=lfs diff=lfs merge=lfs -text
228
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_set_the_table.ttm filter=lfs diff=lfs merge=lfs -text
229
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_pick_plate.ttm filter=lfs diff=lfs merge=lfs -text
230
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_pick_laptop.ttm filter=lfs diff=lfs merge=lfs -text
231
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/beat_the_buzz.ttm filter=lfs diff=lfs merge=lfs -text
232
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/basketball_in_hoop.ttm filter=lfs diff=lfs merge=lfs -text
233
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
234
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/ur5.ttm filter=lfs diff=lfs merge=lfs -text
235
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/sawyer.ttm filter=lfs diff=lfs merge=lfs -text
236
+ third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
237
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/panda.ttm filter=lfs diff=lfs merge=lfs -text
238
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/mico.ttm filter=lfs diff=lfs merge=lfs -text
239
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/jaco.ttm filter=lfs diff=lfs merge=lfs -text
240
+ third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/dual_panda.ttm filter=lfs diff=lfs merge=lfs -text
241
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/knife_block.ttm filter=lfs diff=lfs merge=lfs -text
242
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/frying_pan.ttm filter=lfs diff=lfs merge=lfs -text
243
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/dishwasher.ttm filter=lfs diff=lfs merge=lfs -text
244
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/chopping_board.ttm filter=lfs diff=lfs merge=lfs -text
245
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/door.ttm filter=lfs diff=lfs merge=lfs -text
246
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/carrot.ttm filter=lfs diff=lfs merge=lfs -text
247
+ third_party/AnyBimanual/third_party/RLBench/rlbench/assets/banana.ttm filter=lfs diff=lfs merge=lfs -text
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Created by pytest automatically.
2
+ *
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
2
+ # This file is a cache directory tag created by pytest.
3
+ # For information about cache directory tags, see:
4
+ # https://bford.info/cachedir/spec.html
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/README.md ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # pytest cache directory #
2
+
3
+ This directory contains data from the pytest's cache plugin,
4
+ which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
5
+
6
+ **Do not** commit this to version control.
7
+
8
+ See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/lastfailed ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/nodeids ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ "tests/test_eval_toggle_paths_work.py::test_eval_toggle_paths_work",
3
+ "tests/test_general_eval_protocol_is_identical.py::test_general_eval_protocol_is_identical_across_modes",
4
+ "tests/test_public_benchmark_package_summary.py::test_public_benchmark_package_detects_training_mismatch",
5
+ "tests/test_public_benchmark_package_summary.py::test_public_benchmark_package_summary_passes_with_clear_gain",
6
+ "tests/test_public_benchmark_package_tracks.py::test_public_anchor_protocol_identity_is_mode_invariant",
7
+ "tests/test_public_benchmark_package_tracks.py::test_public_benchmark_package_contains_expected_tracks",
8
+ "tests/test_public_benchmark_package_tracks.py::test_public_target_protocol_identity_is_mode_invariant",
9
+ "tests/test_public_benchmark_package_tracks.py::test_public_track_roles_are_partitioned",
10
+ "tests/test_public_benchmark_package_tracks.py::test_training_fairness_signature_matches_for_trunk_and_adapter"
11
+ ]
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/public_benchmark_package_v1.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "anchor_eval_modes": [
3
+ "trunk_only",
4
+ "adapter_noop",
5
+ "adapter_active"
6
+ ],
7
+ "anchor_track_ids": [],
8
+ "defaults": {
9
+ "anchor_episodes": 25,
10
+ "resolution": 256,
11
+ "target_test_episodes": 100,
12
+ "target_train_demos": 64,
13
+ "target_val_demos": 16
14
+ },
15
+ "package_name": "public_hybrid_bimanual_benchmark_v1",
16
+ "target_eval_modes": [
17
+ "trunk_only_ft",
18
+ "adapter_noop",
19
+ "adapter_active_ft"
20
+ ],
21
+ "target_track_ids": [
22
+ "rlbench2_put_bottle_in_fridge",
23
+ "rlbench2_take_out_tray",
24
+ "rlbench2_take_shoes_out_of_box",
25
+ "rlbench2_lift_tray",
26
+ "rlbench2_straighten_rope",
27
+ "rlbench2_sweep_to_dustpan",
28
+ "dexgarmentlab_store_tops",
29
+ "dexgarmentlab_fold_tops",
30
+ "dexgarmentlab_hang_coat"
31
+ ],
32
+ "thresholds": {
33
+ "anchor_tolerance": 0.02,
34
+ "sign_of_life_intervention_rate": 0.15,
35
+ "sign_of_life_non_base_selection_rate": 0.15,
36
+ "sign_of_life_success_gain": 0.05
37
+ },
38
+ "tracks": [
39
+ {
40
+ "action_space": "bimanual_pose_then_gripper",
41
+ "benchmark_task": "bimanual_put_bottle_in_fridge",
42
+ "default_cameras": [
43
+ "front",
44
+ "wrist_left",
45
+ "wrist_right"
46
+ ],
47
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
48
+ "observation_stack": "rgbd_3cam",
49
+ "public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
50
+ "role": "target",
51
+ "success_metric": "success_rate",
52
+ "suite": "rlbench2",
53
+ "target_behavior": "grasp and place a bottle into a constrained refrigerator cavity",
54
+ "task_family": "containment_manipulation",
55
+ "track_id": "rlbench2_put_bottle_in_fridge"
56
+ },
57
+ {
58
+ "action_space": "bimanual_pose_then_gripper",
59
+ "benchmark_task": "bimanual_take_tray_out_of_oven",
60
+ "default_cameras": [
61
+ "front",
62
+ "wrist_left",
63
+ "wrist_right"
64
+ ],
65
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
66
+ "observation_stack": "rgbd_3cam",
67
+ "public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
68
+ "role": "target",
69
+ "success_metric": "success_rate",
70
+ "suite": "rlbench2",
71
+ "target_behavior": "extract a tray from an enclosed appliance with coordinated bimanual motion",
72
+ "task_family": "tray_extraction",
73
+ "track_id": "rlbench2_take_out_tray"
74
+ },
75
+ {
76
+ "action_space": "bimanual_pose_then_gripper",
77
+ "benchmark_task": "bimanual_take_shoes_out_of_box",
78
+ "default_cameras": [
79
+ "front",
80
+ "wrist_left",
81
+ "wrist_right"
82
+ ],
83
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 shoe-box extraction task.",
84
+ "observation_stack": "rgbd_3cam",
85
+ "public_source": "https://bimanual.github.io/",
86
+ "role": "target",
87
+ "success_metric": "success_rate",
88
+ "suite": "rlbench2",
89
+ "target_behavior": "open a shoe box and remove both shoes with coordinated bimanual manipulation",
90
+ "task_family": "container_extraction",
91
+ "track_id": "rlbench2_take_shoes_out_of_box"
92
+ },
93
+ {
94
+ "action_space": "bimanual_pose_then_gripper",
95
+ "benchmark_task": "bimanual_lift_tray",
96
+ "default_cameras": [
97
+ "front",
98
+ "wrist_left",
99
+ "wrist_right"
100
+ ],
101
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
102
+ "observation_stack": "rgbd_3cam",
103
+ "public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
104
+ "role": "target",
105
+ "success_metric": "success_rate",
106
+ "suite": "rlbench2",
107
+ "target_behavior": "coordinate both arms to lift a tray while maintaining grasp consistency",
108
+ "task_family": "tray_lifting",
109
+ "track_id": "rlbench2_lift_tray"
110
+ },
111
+ {
112
+ "action_space": "bimanual_pose_then_gripper",
113
+ "benchmark_task": "bimanual_straighten_rope",
114
+ "default_cameras": [
115
+ "front",
116
+ "wrist_left",
117
+ "wrist_right"
118
+ ],
119
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
120
+ "observation_stack": "rgbd_3cam",
121
+ "public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
122
+ "role": "target",
123
+ "success_metric": "success_rate",
124
+ "suite": "rlbench2",
125
+ "target_behavior": "coordinate both arms to manipulate and straighten a rope",
126
+ "task_family": "deformable_linear_manipulation",
127
+ "track_id": "rlbench2_straighten_rope"
128
+ },
129
+ {
130
+ "action_space": "bimanual_pose_then_gripper",
131
+ "benchmark_task": "bimanual_sweep_to_dustpan",
132
+ "default_cameras": [
133
+ "front",
134
+ "wrist_left",
135
+ "wrist_right"
136
+ ],
137
+ "notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
138
+ "observation_stack": "rgbd_3cam",
139
+ "public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
140
+ "role": "target",
141
+ "success_metric": "success_rate",
142
+ "suite": "rlbench2",
143
+ "target_behavior": "coordinate a sweeping motion that moves debris toward a dustpan region",
144
+ "task_family": "tool_use_sweeping",
145
+ "track_id": "rlbench2_sweep_to_dustpan"
146
+ },
147
+ {
148
+ "action_space": "bimanual_ik_hand_state",
149
+ "benchmark_task": "Store Tops",
150
+ "default_cameras": [
151
+ "env_camera",
152
+ "garment_camera",
153
+ "object_camera"
154
+ ],
155
+ "notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
156
+ "observation_stack": "rgbd_pointcloud_3cam",
157
+ "public_source": "https://github.com/wayrise/DexGarmentLab",
158
+ "role": "target",
159
+ "success_metric": "success_rate",
160
+ "suite": "dexgarmentlab",
161
+ "target_behavior": "lift and place a top into the target storage region",
162
+ "task_family": "garment_storage",
163
+ "track_id": "dexgarmentlab_store_tops"
164
+ },
165
+ {
166
+ "action_space": "bimanual_ik_hand_state",
167
+ "benchmark_task": "Fold Tops",
168
+ "default_cameras": [
169
+ "env_camera",
170
+ "garment_camera",
171
+ "object_camera"
172
+ ],
173
+ "notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
174
+ "observation_stack": "rgbd_pointcloud_3cam",
175
+ "public_source": "https://github.com/wayrise/DexGarmentLab",
176
+ "role": "target",
177
+ "success_metric": "success_rate",
178
+ "suite": "dexgarmentlab",
179
+ "target_behavior": "execute a multi-stage fold of a top garment with bimanual dexterous control",
180
+ "task_family": "garment_folding",
181
+ "track_id": "dexgarmentlab_fold_tops"
182
+ },
183
+ {
184
+ "action_space": "bimanual_ik_hand_state",
185
+ "benchmark_task": "Hang Coat",
186
+ "default_cameras": [
187
+ "env_camera",
188
+ "garment_camera",
189
+ "object_camera"
190
+ ],
191
+ "notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
192
+ "observation_stack": "rgbd_pointcloud_3cam",
193
+ "public_source": "https://github.com/wayrise/DexGarmentLab",
194
+ "role": "target",
195
+ "success_metric": "success_rate",
196
+ "suite": "dexgarmentlab",
197
+ "target_behavior": "lift and place a coat onto the hanging fixture with bimanual coordination",
198
+ "task_family": "garment_hanging",
199
+ "track_id": "dexgarmentlab_hang_coat"
200
+ }
201
+ ]
202
+ }
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # reveal_vla_bimanual
2
+
3
+ Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
4
+
5
+ This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
6
+
7
+ This repo is structured around five top-level modules:
8
+
9
+ - `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
10
+ - `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
11
+ - `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
12
+ - `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
13
+ - `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
14
+
15
+ Current bootstrap priorities:
16
+
17
+ 1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
18
+ 2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
19
+ 3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
20
+
21
+ ## Public benchmark package
22
+
23
+ The repo now includes a concrete public-benchmark package definition for the next-stage fair comparison:
24
+
25
+ - `eval/public_benchmark_package.py`
26
+ - track registry for bag, dense occluded retrieval, cloth retrieval, and the generic anchor
27
+ - same-protocol signatures across `trunk_only`, `adapter_noop`, and `adapter_active`
28
+ - same-data / same-init fairness signatures for `trunk_only_ft` vs `adapter_active_ft`
29
+
30
+ - `eval/run_public_benchmark_package.py`
31
+ - validates normalized result files from multiple public suites
32
+ - checks protocol identity and training fairness
33
+ - aggregates per-track gains, sign-of-life diagnostics, and anchor regressions
34
+
35
+ Write the default manifest to `~/workspace` with:
36
+
37
+ ```bash
38
+ python -m eval.run_public_benchmark_package \
39
+ --write-default-manifest ~/workspace/public_benchmark_package_v1.json
40
+ ```
41
+
42
+ Summarize normalized result files with:
43
+
44
+ ```bash
45
+ python -m eval.run_public_benchmark_package \
46
+ --result /abs/path/result_a.json \
47
+ --result /abs/path/result_b.json \
48
+ --output-dir ~/workspace/reports/public_benchmark_package_v1
49
+ ```
50
+
51
+ Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
52
+
53
+ ## RLBench env A
54
+
55
+ The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
56
+
57
+ Bring it up with:
58
+
59
+ ```bash
60
+ /workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
61
+ /workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
62
+ /workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
63
+ ```
64
+
65
+ Verify GPU GL on the headless display:
66
+
67
+ ```bash
68
+ DISPLAY=:99 glxinfo -B
69
+ ```
70
+
71
+ Run the RLBench launch/reset/step smoke test:
72
+
73
+ ```bash
74
+ env \
75
+ DISPLAY=:99 \
76
+ XDG_RUNTIME_DIR=/tmp/runtime-root \
77
+ COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
78
+ LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
79
+ QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
80
+ /workspace/.tools/micromamba/bin/micromamba run \
81
+ -r /workspace/.micromamba \
82
+ -p /workspace/envs/rlbench \
83
+ python -m sim_rlbench.launch_smoke --headless
84
+ ```
85
+
86
+ The working benchmark interface is fixed to three cameras only:
87
+
88
+ - `front`
89
+ - `wrist_left`
90
+ - `wrist_right`
91
+
92
+ The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
93
+
94
+ Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
95
+
96
+ ```bash
97
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
98
+ python -m sim_rlbench.smoke_test --print-train-command
99
+ ```
100
+
101
+ Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
102
+
103
+ ```bash
104
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
105
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train
106
+ ```
107
+
108
+ If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
109
+
110
+ ```bash
111
+ apt-get install -y squashfs-tools
112
+ micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
113
+ python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
114
+ ```
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adapter Stack
2
+
3
+ This repo now contains a no-op-safe `trunk + adapter` path alongside the legacy monolithic policies.
4
+
5
+ ## Main classes
6
+
7
+ - `models/policy.py`
8
+ - `FoundationTrunkPolicy`
9
+ - `ElasticOcclusionAdapter`
10
+ - `AdapterWrappedPolicy`
11
+
12
+ - `models/backbones.py`
13
+ - `NoOpAdapterCompatibleTrunkOutput`
14
+ - `TrunkInterface`
15
+
16
+ - `models/action_decoder.py`
17
+ - `TaskRoutedProposalPrior`
18
+
19
+ - `models/planner.py`
20
+ - `ElasticFeasibilityGate`
21
+ - `ResidualActionReranker`
22
+ - `AdapterPlanner`
23
+
24
+ - `models/world_model.py`
25
+ - `LightweightRevealStateTransitionModel`
26
+
27
+ - `models/observation_memory.py`
28
+ - `RevealStateCache`
29
+
30
+ ## Trainer modes
31
+
32
+ `train/trainer.py` now supports:
33
+
34
+ - `policy_type: adapter_wrapped`
35
+ - `policy_type: foundation_trunk`
36
+
37
+ Relevant trainer fields:
38
+
39
+ - `training_regime`
40
+ - `eval_mode`
41
+ - `adapter_mode`
42
+ - `adapter_use_transition_model`
43
+ - `adapter_use_task_conditioning`
44
+
45
+ ## Guardrail tests
46
+
47
+ New tests:
48
+
49
+ - `tests/test_trunk_noop_equivalence.py`
50
+ - `tests/test_adapter_gate_blocks_unsafe_retrieve.py`
51
+ - `tests/test_task_specific_loss_masking.py`
52
+ - `tests/test_cloth_specific_metrics_affect_selection.py`
53
+ - `tests/test_general_eval_protocol_is_identical.py`
54
+
55
+ ## Config templates
56
+
57
+ - `train/configs/proxy_adapter_wrapped_clip_base.yaml`
58
+ - `train/configs/proxy_adapter_wrapped_clip_rank_only.yaml`
59
+ - `train/configs/proxy_adapter_wrapped_clip_noop_eval.yaml`
60
+
61
+ ## Benchmark wrappers
62
+
63
+ - `scripts/run_anchor_adapter_ablations.sh`
64
+ - `scripts/run_proxy_adapter_ablations.sh`
65
+ - `scripts/run_target_like_adapter_subset.sh`
66
+ - `eval/public_benchmark_package.py`
67
+ - `eval/run_public_benchmark_package.py`
68
+
69
+ All new configs and scripts default to `~/workspace` outputs and reports.
70
+
71
+ ## Public benchmark package
72
+
73
+ The public benchmark package is the current fair-comparison contract for real benchmarks:
74
+
75
+ - target tracks:
76
+ - `bag_track` -> `BEHAVIOR-1K/unpacking_childs_bag-0`
77
+ - `occlusion_track` -> `ManiSkill/PickClutterYCB-v1`
78
+ - `cloth_track` -> `GarmentLab/grasp_protocol_stacked_garment`
79
+ - anchor track:
80
+ - `anchor_track` -> `AnyBimanual/dual_push_buttons`
81
+
82
+ The package code enforces:
83
+
84
+ - mode-invariant eval protocols per track
85
+ - same-data / same-init fairness for `trunk_only_ft` vs `adapter_active_ft`
86
+ - sign-of-life thresholds on intervention and non-base proposal selection
87
+ - no-regression tolerance on the trusted generic anchor
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Public Benchmark Package
2
+
3
+ This repo now contains the requested hybrid public-benchmark package for the real-sim phase.
4
+
5
+ ## Tracks
6
+
7
+ - `rlbench2_put_bottle_in_fridge`
8
+ - suite: `RLBench2`
9
+ - task: `bimanual_put_bottle_in_fridge`
10
+ - `rlbench2_take_out_tray`
11
+ - suite: `RLBench2`
12
+ - task: `bimanual_take_tray_out_of_oven`
13
+ - `rlbench2_take_shoes_out_of_box`
14
+ - suite: `RLBench2`
15
+ - task: `bimanual_take_shoes_out_of_box`
16
+ - `rlbench2_lift_tray`
17
+ - suite: `RLBench2`
18
+ - task: `bimanual_lift_tray`
19
+ - `rlbench2_straighten_rope`
20
+ - suite: `RLBench2`
21
+ - task: `bimanual_straighten_rope`
22
+ - `rlbench2_sweep_to_dustpan`
23
+ - suite: `RLBench2`
24
+ - task: `bimanual_sweep_to_dustpan`
25
+ - `dexgarmentlab_store_tops`
26
+ - suite: `DexGarmentLab`
27
+ - task: `Store Tops`
28
+ - `dexgarmentlab_fold_tops`
29
+ - suite: `DexGarmentLab`
30
+ - task: `Fold Tops`
31
+ - `dexgarmentlab_hang_coat`
32
+ - suite: `DexGarmentLab`
33
+ - task: `Hang Coat`
34
+
35
+ ## Enforced fairness
36
+
37
+ - `trunk_only_ft` and `adapter_active_ft` must share:
38
+ - train demos
39
+ - val demos
40
+ - init checkpoint group
41
+ - optimizer
42
+ - LR schedule
43
+ - batch size
44
+ - augmentations
45
+ - early stopping metric
46
+ - max gradient steps
47
+ - unfrozen trunk scope
48
+ - dataset split id
49
+ - all modes on a track must share the same eval protocol signature
50
+ - the current hybrid battery has no dedicated anchor track; `anchor_pass` is vacuously `true`
51
+
52
+ ## Normalized result schema
53
+
54
+ Each external benchmark run should be converted to one JSON object with:
55
+
56
+ - `track_id`
57
+ - `adapter_mode`
58
+ - `successes` or `success_rate`
59
+ - `episodes`
60
+ - `seed`
61
+ - `eval_protocol`
62
+ - for target tracks: `train_spec`
63
+ - optional diagnostics:
64
+ - `intervention_rate`
65
+ - `non_base_selection_rate`
66
+ - `steps_to_first_reveal_or_access`
67
+ - `steps_to_retrieve`
68
+ - `disturbance_proxy`
69
+
70
+ ## Commands
71
+
72
+ Write the default manifest:
73
+
74
+ ```bash
75
+ python -m eval.run_public_benchmark_package \
76
+ --write-default-manifest /workspace/public_hybrid_benchmark_v1.json
77
+ ```
78
+
79
+ Run the full smoke battery:
80
+
81
+ ```bash
82
+ python -m eval.run_hybrid_public_benchmark_smoke \
83
+ --output-dir /workspace/reports/public_hybrid_benchmark_smoke_v1
84
+ ```
85
+
86
+ Run a suite-specific smoke battery:
87
+
88
+ ```bash
89
+ xvfb-run -a -s "-screen 0 1280x1024x24" \
90
+ /workspace/envs/rlbench/bin/python -m eval.run_rlbench_hybrid_smoke
91
+
92
+ /workspace/envs/isaacsim/bin/python -m eval.run_dexgarmentlab_hybrid_smoke
93
+ ```
94
+
95
+ Summarize normalized multi-mode results:
96
+
97
+ ```bash
98
+ python -m eval.run_public_benchmark_package \
99
+ --result /abs/path/rlbench2_put_bottle_in_fridge_adapter_active_seed17.json \
100
+ --result /abs/path/dexgarmentlab_hang_coat_trunk_seed17.json \
101
+ --output-dir /workspace/reports/public_hybrid_benchmark_v1
102
+ ```
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_hybrid_public_benchmark_smoke.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import subprocess
7
+ import sys
8
+ from pathlib import Path
9
+ from typing import Any
10
+
11
+
12
+ REPO_CODE_ROOT = Path(__file__).resolve().parents[1]
13
+ WORKSPACE_ROOT = Path("/workspace")
14
+ DEFAULT_REPORT_DIR = WORKSPACE_ROOT / "reports" / "public_hybrid_benchmark_smoke_v1"
15
+ DEFAULT_RLBENCH_PYTHON = WORKSPACE_ROOT / "envs" / "rlbench" / "bin" / "python"
16
+ DEFAULT_DEX_PYTHON = WORKSPACE_ROOT / "envs" / "isaacsim" / "bin" / "python"
17
+ DEFAULT_COPPELIASIM_ROOT = WORKSPACE_ROOT / "assets" / "coppeliasim_v4_1_0"
18
+
19
+
20
+ def _run(command: list[str], *, env: dict[str, str]) -> subprocess.CompletedProcess[str]:
21
+ return subprocess.run(command, capture_output=True, text=True, check=False, env=env)
22
+
23
+
24
+ def _summary_path(suite_output_dir: Path, filename: str) -> Path:
25
+ return suite_output_dir / filename
26
+
27
+
28
+ def _load_json(path: Path) -> dict[str, Any]:
29
+ with path.open("r", encoding="utf-8") as handle:
30
+ return json.load(handle)
31
+
32
+
33
+ def _parse_args() -> argparse.Namespace:
34
+ parser = argparse.ArgumentParser(description="Run the full hybrid public benchmark smoke battery.")
35
+ parser.add_argument("--output-dir", type=Path, default=DEFAULT_REPORT_DIR)
36
+ parser.add_argument("--adapter-mode", type=str, default="trunk_only_ft")
37
+ parser.add_argument("--seed", type=int, default=17)
38
+ parser.add_argument("--rlbench-python", type=Path, default=DEFAULT_RLBENCH_PYTHON)
39
+ parser.add_argument("--dex-python", type=Path, default=DEFAULT_DEX_PYTHON)
40
+ parser.add_argument("--skip-rlbench", action="store_true")
41
+ parser.add_argument("--skip-dex", action="store_true")
42
+ return parser.parse_args()
43
+
44
+
45
+ def main() -> None:
46
+ args = _parse_args()
47
+ args.output_dir.mkdir(parents=True, exist_ok=True)
48
+
49
+ env = os.environ.copy()
50
+ env["PYTHONPATH"] = f"{REPO_CODE_ROOT}:{env.get('PYTHONPATH', '')}".rstrip(":")
51
+
52
+ suite_summaries: dict[str, Any] = {}
53
+ failures: list[str] = []
54
+
55
+ if not args.skip_rlbench:
56
+ rlbench_out = args.output_dir / "rlbench2"
57
+ rlbench_out.mkdir(parents=True, exist_ok=True)
58
+ rlbench_env = dict(env)
59
+ rlbench_env["COPPELIASIM_ROOT"] = rlbench_env.get("COPPELIASIM_ROOT", str(DEFAULT_COPPELIASIM_ROOT))
60
+ rlbench_env["LD_LIBRARY_PATH"] = (
61
+ f"{rlbench_env['COPPELIASIM_ROOT']}:{rlbench_env.get('LD_LIBRARY_PATH', '')}"
62
+ ).rstrip(":")
63
+ rlbench_env["QT_QPA_PLATFORM_PLUGIN_PATH"] = rlbench_env.get(
64
+ "QT_QPA_PLATFORM_PLUGIN_PATH",
65
+ rlbench_env["COPPELIASIM_ROOT"],
66
+ )
67
+ rlbench_env["QT_PLUGIN_PATH"] = rlbench_env.get("QT_PLUGIN_PATH", rlbench_env["COPPELIASIM_ROOT"])
68
+ rlbench_env["XDG_RUNTIME_DIR"] = rlbench_env.get("XDG_RUNTIME_DIR", "/tmp/runtime-root")
69
+ rlbench_proc = _run(
70
+ [
71
+ "xvfb-run",
72
+ "-a",
73
+ "-s",
74
+ "-screen 0 1280x1024x24",
75
+ str(args.rlbench_python),
76
+ "-m",
77
+ "eval.run_rlbench_hybrid_smoke",
78
+ "--output-dir",
79
+ str(rlbench_out),
80
+ "--adapter-mode",
81
+ args.adapter_mode,
82
+ "--seed",
83
+ str(args.seed),
84
+ ],
85
+ env=rlbench_env,
86
+ )
87
+ if rlbench_proc.returncode != 0:
88
+ failures.append("rlbench2")
89
+ suite_summaries["rlbench2"] = {
90
+ "returncode": rlbench_proc.returncode,
91
+ "stdout_tail": rlbench_proc.stdout.splitlines()[-20:],
92
+ "stderr_tail": rlbench_proc.stderr.splitlines()[-20:],
93
+ "summary_path": str(_summary_path(rlbench_out, "rlbench_hybrid_smoke_summary.json")),
94
+ }
95
+
96
+ if not args.skip_dex:
97
+ dex_out = args.output_dir / "dexgarmentlab"
98
+ dex_out.mkdir(parents=True, exist_ok=True)
99
+ dex_proc = _run(
100
+ [
101
+ str(args.dex_python),
102
+ "-m",
103
+ "eval.run_dexgarmentlab_hybrid_smoke",
104
+ "--output-dir",
105
+ str(dex_out),
106
+ "--adapter-mode",
107
+ args.adapter_mode,
108
+ "--seed",
109
+ str(args.seed),
110
+ ],
111
+ env=env,
112
+ )
113
+ if dex_proc.returncode != 0:
114
+ failures.append("dexgarmentlab")
115
+ suite_summaries["dexgarmentlab"] = {
116
+ "returncode": dex_proc.returncode,
117
+ "stdout_tail": dex_proc.stdout.splitlines()[-20:],
118
+ "stderr_tail": dex_proc.stderr.splitlines()[-20:],
119
+ "summary_path": str(_summary_path(dex_out, "dexgarmentlab_hybrid_smoke_summary.json")),
120
+ }
121
+
122
+ summary_payload = {
123
+ "adapter_mode": args.adapter_mode,
124
+ "seed": args.seed,
125
+ "suite_summaries": suite_summaries,
126
+ "failures": failures,
127
+ }
128
+
129
+ for suite_name, payload in list(suite_summaries.items()):
130
+ summary_path = Path(payload["summary_path"])
131
+ if summary_path.exists():
132
+ summary_payload["suite_summaries"][suite_name]["summary"] = _load_json(summary_path)
133
+
134
+ output_path = args.output_dir / "hybrid_public_benchmark_smoke_summary.json"
135
+ output_path.write_text(json.dumps(summary_payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
136
+ print(json.dumps({"summary_path": str(output_path), "failures": failures}, indent=2))
137
+ if failures:
138
+ raise SystemExit(1)
139
+
140
+
141
+ if __name__ == "__main__":
142
+ main()
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py ADDED
@@ -0,0 +1,887 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import torch
6
+ from torch import Tensor, nn
7
+
8
+
9
+ @dataclass
10
+ class PlannerConfig:
11
+ hidden_dim: int = 512
12
+ num_candidates: int = 8
13
+ action_dim: int = 14
14
+ num_support_modes: int = 3
15
+ utility_margin: float = 0.1
16
+ corridor_weight: float = 1.0
17
+ persistence_weight: float = 0.5
18
+ proposal_weight: float = 0.5
19
+ task_progress_weight: float = 0.75
20
+ disturbance_weight: float = 0.75
21
+ reocclusion_weight: float = 0.5
22
+ visibility_weight: float = 0.25
23
+ num_heads: int = 4
24
+ num_layers: int = 2
25
+ num_phases: int = 5
26
+ num_arm_roles: int = 4
27
+ top_k: int = 4
28
+ belief_gain_weight: float = 1.0
29
+ visibility_gain_weight: float = 0.75
30
+ clearance_weight: float = 0.75
31
+ occluder_contact_weight: float = 0.5
32
+ grasp_affordance_weight: float = 0.75
33
+ support_stability_weight: float = 0.5
34
+ residual_weight: float = 0.5
35
+ retrieve_access_threshold: float = 0.15
36
+ retrieve_persistence_threshold: float = 0.15
37
+ retrieve_support_threshold: float = 0.25
38
+ retrieve_reocclusion_threshold: float = 0.6
39
+ adapter_confidence_threshold: float = 0.55
40
+ mode_preference_bonus: float = 3.0
41
+ premature_retrieve_penalty: float = 1.5
42
+ premature_insert_penalty: float = 0.75
43
+ premature_occlusion_sweep_penalty: float = 0.75
44
+ premature_maintain_penalty: float = 0.0
45
+ retrieve_stage_access_threshold: float = 0.45
46
+ retrieve_stage_reveal_threshold: float = 0.40
47
+ retrieve_stage_persistence_threshold: float = 0.20
48
+ retrieve_stage_support_threshold: float = 0.25
49
+ insert_stage_access_threshold: float = 0.40
50
+ insert_stage_visibility_threshold: float = 0.30
51
+ insert_stage_support_threshold: float = 0.25
52
+ occlusion_maintain_gap_min_access: float = 0.0
53
+ occlusion_maintain_gap_min_visibility: float = 0.0
54
+
55
+
56
+ class RevealPlanner(nn.Module):
57
+ def __init__(self, config: PlannerConfig) -> None:
58
+ super().__init__()
59
+ self.config = config
60
+ summary_dim = (
61
+ config.action_dim * 2
62
+ + 3
63
+ + 3
64
+ + 1
65
+ + 3
66
+ + 1
67
+ )
68
+ self.trunk = nn.Sequential(
69
+ nn.LayerNorm(summary_dim),
70
+ nn.Linear(summary_dim, config.hidden_dim),
71
+ nn.GELU(),
72
+ nn.Linear(config.hidden_dim, config.hidden_dim),
73
+ nn.GELU(),
74
+ )
75
+ self.success_head = nn.Linear(config.hidden_dim, 1)
76
+ self.risk_head = nn.Linear(config.hidden_dim, 1)
77
+
78
+ def summarize_candidates(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
79
+ candidate_mean = candidate_chunks.mean(dim=2)
80
+ candidate_terminal = candidate_chunks[:, :, -1]
81
+ corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=-2)
82
+ persistence = rollout_state["persistence_horizon"].mean(dim=-2)
83
+ disturbance = rollout_state["disturbance_cost"].mean(dim=-1, keepdim=True)
84
+ reocclusion = rollout_state["reocclusion_logit"].sigmoid().mean(dim=-2)
85
+ uncertainty = rollout_state["uncertainty"].mean(dim=-1, keepdim=True)
86
+ return torch.cat(
87
+ [
88
+ candidate_mean,
89
+ candidate_terminal,
90
+ corridor_prob,
91
+ persistence,
92
+ disturbance,
93
+ reocclusion,
94
+ uncertainty,
95
+ ],
96
+ dim=-1,
97
+ )
98
+
99
+ def score_rollouts(self, rollout_state: dict[str, Tensor], candidate_chunks: Tensor) -> dict[str, Tensor]:
100
+ features = self.summarize_candidates(candidate_chunks, rollout_state)
101
+ hidden = self.trunk(features)
102
+ success_logits = self.success_head(hidden).squeeze(-1)
103
+ risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
104
+ utility_scores = success_logits.sigmoid() - risk_values
105
+ return {
106
+ "planner_features": features,
107
+ "planner_hidden": hidden,
108
+ "success_logits": success_logits,
109
+ "risk_values": risk_values,
110
+ "utility_scores": utility_scores,
111
+ }
112
+
113
+ def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> dict[str, Tensor]:
114
+ outputs = self.score_rollouts(rollout_state=rollout_state, candidate_chunks=candidate_chunks)
115
+ best_idx = outputs["utility_scores"].argmax(dim=-1)
116
+ batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
117
+ return {
118
+ **outputs,
119
+ "best_indices": best_idx,
120
+ "best_chunk": candidate_chunks[batch_indices, best_idx],
121
+ }
122
+
123
+
124
+ class InteractionPlanner(nn.Module):
125
+ def __init__(self, config: PlannerConfig) -> None:
126
+ super().__init__()
127
+ self.config = config
128
+ step_dim = (
129
+ config.action_dim
130
+ + config.num_phases
131
+ + (2 * config.num_arm_roles)
132
+ + config.num_support_modes
133
+ + 7
134
+ )
135
+ self.step_proj = nn.Sequential(
136
+ nn.LayerNorm(step_dim),
137
+ nn.Linear(step_dim, config.hidden_dim),
138
+ nn.GELU(),
139
+ )
140
+ encoder_layer = nn.TransformerEncoderLayer(
141
+ d_model=config.hidden_dim,
142
+ nhead=config.num_heads,
143
+ dim_feedforward=config.hidden_dim * 4,
144
+ batch_first=True,
145
+ norm_first=True,
146
+ )
147
+ self.sequence_encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers)
148
+ self.cls_token = nn.Parameter(torch.randn(1, 1, config.hidden_dim) * 0.02)
149
+ self.success_head = nn.Linear(config.hidden_dim, 1)
150
+ self.risk_head = nn.Linear(config.hidden_dim, 1)
151
+ self.score_head = nn.Linear(config.hidden_dim, 1)
152
+
153
+ def _mean_field(self, tensor: Tensor) -> Tensor:
154
+ return tensor.mean(dim=(-1, -2))
155
+
156
+ def summarize_trajectory(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
157
+ horizon = min(candidate_chunks.shape[2], rollout_state["phase_logits"].shape[2])
158
+ candidate_steps = candidate_chunks[:, :, :horizon]
159
+ phase_probs = rollout_state["phase_logits"][:, :, :horizon].softmax(dim=-1)
160
+ support_probs = rollout_state["support_mode_logits"][:, :, :horizon].softmax(dim=-1)
161
+ arm_role_probs = rollout_state["arm_role_logits"][:, :, :horizon].softmax(dim=-1).flatten(start_dim=-2)
162
+ target_mean = self._mean_field(rollout_state["target_field"][:, :, :horizon].sigmoid())
163
+ feasibility_mean = self._mean_field(rollout_state["actor_feasibility_field"][:, :, :horizon].sigmoid())
164
+ persistence_mean = self._mean_field(rollout_state["persistence_field"][:, :, :horizon])
165
+ risk_mean = self._mean_field(rollout_state["risk_field"][:, :, :horizon])
166
+ uncertainty_mean = self._mean_field(rollout_state["uncertainty_field"][:, :, :horizon])
167
+ role_gap = (
168
+ rollout_state["arm_role_logits"][:, :, :horizon, 0].softmax(dim=-1)
169
+ - rollout_state["arm_role_logits"][:, :, :horizon, 1].softmax(dim=-1)
170
+ ).abs().mean(dim=-1, keepdim=True)
171
+ return torch.cat(
172
+ [
173
+ candidate_steps,
174
+ phase_probs,
175
+ arm_role_probs,
176
+ support_probs,
177
+ target_mean,
178
+ feasibility_mean,
179
+ persistence_mean,
180
+ risk_mean,
181
+ uncertainty_mean,
182
+ role_gap,
183
+ ],
184
+ dim=-1,
185
+ )
186
+
187
+ def score_rollouts(
188
+ self,
189
+ rollout_state: dict[str, Tensor],
190
+ candidate_chunks: Tensor,
191
+ proposal_logits: Tensor | None = None,
192
+ ) -> dict[str, Tensor]:
193
+ features = self.summarize_trajectory(candidate_chunks, rollout_state)
194
+ batch_size, num_candidates, horizon, _ = features.shape
195
+ flat_features = features.view(batch_size * num_candidates, horizon, -1)
196
+ hidden_steps = self.step_proj(flat_features)
197
+ cls = self.cls_token.expand(batch_size * num_candidates, -1, -1)
198
+ encoded = self.sequence_encoder(torch.cat([cls, hidden_steps], dim=1))
199
+ pooled = encoded[:, 0]
200
+ success_logits = self.success_head(pooled).view(batch_size, num_candidates).squeeze(-1)
201
+ risk_values = torch.sigmoid(self.risk_head(pooled)).view(batch_size, num_candidates).squeeze(-1)
202
+ utility_scores = self.score_head(pooled).view(batch_size, num_candidates).squeeze(-1)
203
+ utility_scores = utility_scores + success_logits.sigmoid() - risk_values
204
+ if proposal_logits is not None and proposal_logits.shape == utility_scores.shape:
205
+ utility_scores = utility_scores + self.config.proposal_weight * proposal_logits.sigmoid()
206
+ return {
207
+ "planner_features": features.mean(dim=2),
208
+ "planner_hidden": pooled.view(batch_size, num_candidates, -1),
209
+ "success_logits": success_logits,
210
+ "risk_values": risk_values,
211
+ "utility_scores": utility_scores,
212
+ }
213
+
214
+ def select_best(
215
+ self,
216
+ candidate_chunks: Tensor,
217
+ rollout_state: dict[str, Tensor],
218
+ proposal_logits: Tensor | None = None,
219
+ ) -> dict[str, Tensor]:
220
+ outputs = self.score_rollouts(
221
+ rollout_state=rollout_state,
222
+ candidate_chunks=candidate_chunks,
223
+ proposal_logits=proposal_logits,
224
+ )
225
+ best_idx = outputs["utility_scores"].argmax(dim=-1)
226
+ batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
227
+ return {
228
+ **outputs,
229
+ "best_indices": best_idx,
230
+ "best_chunk": candidate_chunks[batch_indices, best_idx],
231
+ }
232
+
233
+
234
+ class StructuredElasticUtility(nn.Module):
235
+ def __init__(self, config: PlannerConfig) -> None:
236
+ super().__init__()
237
+ self.config = config
238
+
239
+ def _field_mean(self, tensor: Tensor) -> Tensor:
240
+ if tensor.ndim == 6:
241
+ return tensor.mean(dim=(-1, -2, -3))
242
+ if tensor.ndim == 5:
243
+ return tensor.mean(dim=(-1, -2))
244
+ if tensor.ndim == 4:
245
+ return tensor.mean(dim=(-1, -2))
246
+ return tensor
247
+
248
+ def _initial_scalar(self, state: dict[str, Tensor], key: str) -> Tensor:
249
+ value = state[key]
250
+ if value.ndim >= 4:
251
+ return value.mean(dim=tuple(range(1, value.ndim)))
252
+ if value.ndim == 3:
253
+ return value.mean(dim=(-1, -2))
254
+ if value.ndim == 2:
255
+ return value.mean(dim=-1)
256
+ return value
257
+
258
+ def forward(
259
+ self,
260
+ initial_state: dict[str, Tensor],
261
+ rollout_state: dict[str, Tensor],
262
+ candidate_chunks: Tensor,
263
+ ) -> dict[str, Tensor]:
264
+ initial_belief = self._initial_scalar(initial_state, "target_belief_field").unsqueeze(1)
265
+ initial_visibility = self._initial_scalar(initial_state, "visibility_field").unsqueeze(1)
266
+ belief_future = self._field_mean(rollout_state["target_belief_field"]).mean(dim=-1)
267
+ visibility_future = self._field_mean(rollout_state["visibility_field"]).mean(dim=-1)
268
+ clearance = self._field_mean(rollout_state["clearance_field"]).mean(dim=-1)
269
+ occluder_contact = self._field_mean(rollout_state["occluder_contact_field"]).mean(dim=-1)
270
+ grasp_affordance = self._field_mean(rollout_state["grasp_affordance_field"]).mean(dim=-1)
271
+ support_stability = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).mean(dim=-1)
272
+ persistence_traj = self._field_mean(rollout_state["persistence_field"])
273
+ reocclusion_traj = self._field_mean(rollout_state["reocclusion_field"])
274
+ disturbance_traj = self._field_mean(rollout_state["disturbance_field"])
275
+ access_traj = torch.sigmoid(self._field_mean(rollout_state["access_field"]))
276
+ persistence = persistence_traj.mean(dim=-1)
277
+ reocclusion = reocclusion_traj.mean(dim=-1)
278
+ disturbance = disturbance_traj.mean(dim=-1)
279
+ access_quality = access_traj.mean(dim=-1)
280
+ access_floor = access_traj.amin(dim=-1)
281
+ persistence_floor = persistence_traj.amin(dim=-1)
282
+ support_floor = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).amin(dim=-1)
283
+ reocclusion_worst = reocclusion_traj.amax(dim=-1)
284
+ retrieve_progress = torch.sigmoid(candidate_chunks[:, :, :, -1]).mean(dim=-1)
285
+ utility = (
286
+ self.config.belief_gain_weight * (belief_future - initial_belief)
287
+ + self.config.visibility_gain_weight * (visibility_future - initial_visibility)
288
+ + self.config.clearance_weight * clearance
289
+ + self.config.occluder_contact_weight * occluder_contact
290
+ + self.config.grasp_affordance_weight * grasp_affordance
291
+ + self.config.persistence_weight * persistence
292
+ + self.config.support_stability_weight * support_stability
293
+ + self.config.corridor_weight * access_quality
294
+ + self.config.task_progress_weight * retrieve_progress
295
+ - self.config.reocclusion_weight * reocclusion
296
+ - self.config.disturbance_weight * disturbance
297
+ - self.config.visibility_weight * (1.0 - visibility_future)
298
+ )
299
+ return {
300
+ "belief_gain": belief_future - initial_belief,
301
+ "visibility_gain": visibility_future - initial_visibility,
302
+ "clearance": clearance,
303
+ "occluder_contact_quality": occluder_contact,
304
+ "grasp_affordance": grasp_affordance,
305
+ "persistence": persistence,
306
+ "support_stability": support_stability,
307
+ "reocclusion_penalty": reocclusion,
308
+ "reocclusion_worst": reocclusion_worst,
309
+ "disturbance_penalty": disturbance,
310
+ "access_quality": access_quality,
311
+ "access_floor": access_floor,
312
+ "persistence_floor": persistence_floor,
313
+ "support_floor": support_floor,
314
+ "task_progress": retrieve_progress,
315
+ "utility_structured": utility,
316
+ }
317
+
318
+
319
+ class ResidualPlannerScorer(nn.Module):
320
+ def __init__(self, config: PlannerConfig) -> None:
321
+ super().__init__()
322
+ feature_dim = (config.action_dim * 2) + 11
323
+ self.trunk = nn.Sequential(
324
+ nn.LayerNorm(feature_dim),
325
+ nn.Linear(feature_dim, config.hidden_dim),
326
+ nn.GELU(),
327
+ nn.Linear(config.hidden_dim, config.hidden_dim),
328
+ nn.GELU(),
329
+ )
330
+ self.success_head = nn.Linear(config.hidden_dim, 1)
331
+ self.risk_head = nn.Linear(config.hidden_dim, 1)
332
+ self.residual_head = nn.Linear(config.hidden_dim, 1)
333
+
334
+ def forward(
335
+ self,
336
+ candidate_chunks: Tensor,
337
+ structured: dict[str, Tensor],
338
+ proposal_logits: Tensor | None = None,
339
+ ) -> dict[str, Tensor]:
340
+ candidate_mean = candidate_chunks.mean(dim=2)
341
+ candidate_terminal = candidate_chunks[:, :, -1]
342
+ components = torch.stack(
343
+ [
344
+ structured["belief_gain"],
345
+ structured["visibility_gain"],
346
+ structured["clearance"],
347
+ structured["occluder_contact_quality"],
348
+ structured["grasp_affordance"],
349
+ structured["persistence"],
350
+ structured["support_stability"],
351
+ structured["reocclusion_penalty"],
352
+ structured["disturbance_penalty"],
353
+ structured["access_quality"],
354
+ structured["task_progress"],
355
+ ],
356
+ dim=-1,
357
+ )
358
+ features = torch.cat([candidate_mean, candidate_terminal, components], dim=-1)
359
+ hidden = self.trunk(features)
360
+ success_logits = self.success_head(hidden).squeeze(-1)
361
+ risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
362
+ residual = self.residual_head(hidden).squeeze(-1)
363
+ if proposal_logits is not None and proposal_logits.shape == residual.shape:
364
+ residual = residual + 0.25 * proposal_logits.sigmoid()
365
+ return {
366
+ "planner_hidden": hidden,
367
+ "success_logits": success_logits,
368
+ "risk_values": risk_values,
369
+ "utility_residual": residual,
370
+ }
371
+
372
+
373
+ class CascadePlanner(nn.Module):
374
+ def __init__(self, config: PlannerConfig) -> None:
375
+ super().__init__()
376
+ self.config = config
377
+ self.structured = StructuredElasticUtility(config)
378
+ self.residual = ResidualPlannerScorer(config)
379
+
380
+ def shortlist(
381
+ self,
382
+ proposal_logits: Tensor | None,
383
+ candidate_chunks: Tensor,
384
+ proposal_mode_assignments: Tensor | None = None,
385
+ ) -> Tensor:
386
+ batch_size, num_candidates = candidate_chunks.shape[:2]
387
+ top_k = min(max(1, self.config.top_k), num_candidates)
388
+ if proposal_logits is None:
389
+ cheap_scores = -candidate_chunks.square().mean(dim=(-1, -2))
390
+ else:
391
+ cheap_scores = proposal_logits
392
+ if proposal_mode_assignments is None:
393
+ return cheap_scores.topk(top_k, dim=-1).indices
394
+ if proposal_mode_assignments.ndim == 1:
395
+ proposal_mode_assignments = proposal_mode_assignments.unsqueeze(0).expand(batch_size, -1)
396
+
397
+ shortlisted = []
398
+ for batch_idx in range(batch_size):
399
+ scores = cheap_scores[batch_idx]
400
+ mode_ids = proposal_mode_assignments[batch_idx]
401
+ mode_best: list[tuple[float, int]] = []
402
+ for mode_id in torch.unique(mode_ids):
403
+ mode_indices = torch.nonzero(mode_ids == mode_id, as_tuple=False).squeeze(-1)
404
+ best_local = mode_indices[scores[mode_indices].argmax()]
405
+ mode_best.append((float(scores[best_local].detach()), int(best_local)))
406
+ mode_best.sort(key=lambda item: item[0], reverse=True)
407
+ chosen = [index for _, index in mode_best[:top_k]]
408
+ if len(chosen) < top_k:
409
+ for candidate_idx in scores.argsort(descending=True).tolist():
410
+ if candidate_idx not in chosen:
411
+ chosen.append(candidate_idx)
412
+ if len(chosen) >= top_k:
413
+ break
414
+ shortlisted.append(torch.as_tensor(chosen[:top_k], device=candidate_chunks.device, dtype=torch.long))
415
+ return torch.stack(shortlisted, dim=0)
416
+
417
+ def select_best(
418
+ self,
419
+ initial_state: dict[str, Tensor],
420
+ candidate_chunks: Tensor,
421
+ rollout_state: dict[str, Tensor],
422
+ proposal_logits: Tensor | None = None,
423
+ candidate_indices: Tensor | None = None,
424
+ proposal_mode_names: list[list[str]] | None = None,
425
+ ) -> dict[str, Tensor]:
426
+ structured = self.structured(
427
+ initial_state=initial_state,
428
+ rollout_state=rollout_state,
429
+ candidate_chunks=candidate_chunks,
430
+ )
431
+ residual = self.residual(
432
+ candidate_chunks=candidate_chunks,
433
+ structured=structured,
434
+ proposal_logits=proposal_logits,
435
+ )
436
+ utility_total = structured["utility_structured"] + self.config.residual_weight * residual["utility_residual"]
437
+ utility_total = utility_total + residual["success_logits"].sigmoid() - residual["risk_values"]
438
+ feasibility_penalty = torch.zeros_like(utility_total)
439
+ if proposal_mode_names is not None:
440
+ retrieve_like = torch.zeros_like(utility_total, dtype=torch.bool)
441
+ for batch_idx, names in enumerate(proposal_mode_names):
442
+ for candidate_idx, name in enumerate(names[: utility_total.shape[1]]):
443
+ retrieve_like[batch_idx, candidate_idx] = any(
444
+ token in name for token in ("retrieve", "insert_actor", "probe_inside")
445
+ )
446
+ blocked = (
447
+ (structured["access_floor"] < 0.15)
448
+ | (structured["persistence_floor"] < 0.15)
449
+ | (structured["support_floor"] < 0.25)
450
+ | (structured["reocclusion_worst"] > 0.6)
451
+ )
452
+ feasibility_penalty = retrieve_like.to(dtype=utility_total.dtype) * blocked.to(dtype=utility_total.dtype) * 2.0
453
+ utility_total = utility_total - feasibility_penalty
454
+ best_local = utility_total.argmax(dim=-1)
455
+ batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
456
+ if candidate_indices is None:
457
+ best_indices = best_local
458
+ else:
459
+ best_indices = candidate_indices[batch_indices, best_local]
460
+ return {
461
+ **structured,
462
+ **residual,
463
+ "utility_total": utility_total,
464
+ "utility_scores": utility_total,
465
+ "feasibility_penalty": feasibility_penalty,
466
+ "best_indices": best_indices,
467
+ "best_chunk": candidate_chunks[batch_indices, best_local],
468
+ "ranking_diagnostics": {
469
+ "topk_indices": candidate_indices if candidate_indices is not None else best_local.unsqueeze(-1),
470
+ "best_local_indices": best_local,
471
+ },
472
+ }
473
+
474
+
475
+ def _summary_scalar(state: dict[str, Tensor], key: str, fallback_keys: tuple[str, ...] = ()) -> Tensor:
476
+ for candidate in (key, *fallback_keys):
477
+ value = state.get(candidate)
478
+ if value is None:
479
+ continue
480
+ if value.ndim >= 5:
481
+ return value.mean(dim=tuple(range(value.ndim - 2, value.ndim))).mean(dim=-1)
482
+ if value.ndim == 4:
483
+ return value.mean(dim=(-1, -2))
484
+ if value.ndim == 3:
485
+ return value
486
+ if value.ndim == 2:
487
+ return value
488
+ return value.unsqueeze(-1)
489
+ raise KeyError(f"Missing summary key {key} and fallbacks {fallback_keys}.")
490
+
491
+
492
+ def _optional_summary_scalar(
493
+ state: dict[str, Tensor],
494
+ key: str,
495
+ *,
496
+ reference: Tensor,
497
+ fallback_keys: tuple[str, ...] = (),
498
+ ) -> Tensor:
499
+ try:
500
+ return _summary_scalar(state, key, fallback_keys)
501
+ except KeyError:
502
+ return torch.zeros_like(reference)
503
+
504
+
505
+ class ElasticFeasibilityGate(nn.Module):
506
+ def __init__(self, config: PlannerConfig) -> None:
507
+ super().__init__()
508
+ self.config = config
509
+
510
+ def forward(
511
+ self,
512
+ *,
513
+ rollout_state: dict[str, Tensor],
514
+ proposal_mode_names: list[list[str]],
515
+ ) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]:
516
+ access = _summary_scalar(rollout_state, "access_summary", ("access_quality",))
517
+ persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon"))
518
+ support = _summary_scalar(rollout_state, "support_summary", ("support_stability",))
519
+ reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",))
520
+ disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",))
521
+ access_floor = access.amin(dim=-1)
522
+ persistence_floor = persistence.amin(dim=-1)
523
+ support_floor = support.amin(dim=-1)
524
+ reocclusion_worst = reocclusion.amax(dim=-1)
525
+ disturbance_worst = disturbance.amax(dim=-1)
526
+
527
+ blocked = (
528
+ (access_floor < self.config.retrieve_access_threshold)
529
+ | (persistence_floor < self.config.retrieve_persistence_threshold)
530
+ | (support_floor < self.config.retrieve_support_threshold)
531
+ | (reocclusion_worst > self.config.retrieve_reocclusion_threshold)
532
+ )
533
+ penalties = blocked.to(dtype=access.dtype) * 2.0
534
+ allowed_mask = torch.ones_like(access_floor, dtype=torch.bool)
535
+ reject_diagnostics: list[list[dict[str, float | bool | str]]] = []
536
+ for batch_idx, names in enumerate(proposal_mode_names):
537
+ sample_records: list[dict[str, float | bool | str]] = []
538
+ for candidate_idx, name in enumerate(names[: access_floor.shape[1]]):
539
+ retrieve_like = any(token in name for token in ("retrieve", "insert_actor", "probe_inside"))
540
+ candidate_blocked = bool(retrieve_like and blocked[batch_idx, candidate_idx])
541
+ if candidate_blocked:
542
+ allowed_mask[batch_idx, candidate_idx] = False
543
+ sample_records.append(
544
+ {
545
+ "mode_name": name,
546
+ "retrieve_like": retrieve_like,
547
+ "blocked": candidate_blocked,
548
+ "access_floor": float(access_floor[batch_idx, candidate_idx].detach()),
549
+ "persistence_floor": float(persistence_floor[batch_idx, candidate_idx].detach()),
550
+ "support_floor": float(support_floor[batch_idx, candidate_idx].detach()),
551
+ "reocclusion_worst": float(reocclusion_worst[batch_idx, candidate_idx].detach()),
552
+ "disturbance_worst": float(disturbance_worst[batch_idx, candidate_idx].detach()),
553
+ }
554
+ )
555
+ reject_diagnostics.append(sample_records)
556
+
557
+ confidence = torch.sigmoid(
558
+ 2.0 * access.mean(dim=-1)
559
+ + 1.5 * persistence.mean(dim=-1)
560
+ + 1.5 * support.mean(dim=-1)
561
+ - 1.5 * reocclusion.mean(dim=-1)
562
+ - disturbance.mean(dim=-1)
563
+ )
564
+ return {
565
+ "allowed_mask": allowed_mask,
566
+ "penalties": penalties,
567
+ "blocked_mask": blocked,
568
+ "adapter_confidence": confidence,
569
+ "gate_access_floor": access_floor,
570
+ "gate_persistence_floor": persistence_floor,
571
+ "gate_support_floor": support_floor,
572
+ "gate_reocclusion_worst": reocclusion_worst,
573
+ "reject_diagnostics": reject_diagnostics,
574
+ }
575
+
576
+
577
+ class ResidualActionReranker(nn.Module):
578
+ def __init__(self, config: PlannerConfig) -> None:
579
+ super().__init__()
580
+ feature_dim = (config.action_dim * 2) + 8
581
+ self.network = nn.Sequential(
582
+ nn.LayerNorm(feature_dim),
583
+ nn.Linear(feature_dim, config.hidden_dim),
584
+ nn.GELU(),
585
+ nn.Linear(config.hidden_dim, config.hidden_dim),
586
+ nn.GELU(),
587
+ )
588
+ self.score_head = nn.Linear(config.hidden_dim, 1)
589
+ self.success_head = nn.Linear(config.hidden_dim, 1)
590
+ self.risk_head = nn.Linear(config.hidden_dim, 1)
591
+
592
+ def forward(
593
+ self,
594
+ *,
595
+ candidate_chunks: Tensor,
596
+ rollout_state: dict[str, Tensor],
597
+ proposal_logits: Tensor | None,
598
+ ) -> dict[str, Tensor]:
599
+ candidate_mean = candidate_chunks.mean(dim=2)
600
+ candidate_terminal = candidate_chunks[:, :, -1]
601
+ visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",))
602
+ access = _summary_scalar(rollout_state, "access_summary", ("access_quality",))
603
+ persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon"))
604
+ support = _summary_scalar(rollout_state, "support_summary", ("support_stability",))
605
+ reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",))
606
+ disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",))
607
+ fold_preservation = _optional_summary_scalar(
608
+ rollout_state,
609
+ "fold_preservation_summary",
610
+ reference=visibility,
611
+ fallback_keys=("fold_preservation",),
612
+ )
613
+ lift_risk = _optional_summary_scalar(
614
+ rollout_state,
615
+ "lift_too_much_risk_summary",
616
+ reference=visibility,
617
+ fallback_keys=("lift_too_much_risk",),
618
+ )
619
+ features = torch.cat(
620
+ [
621
+ candidate_mean,
622
+ candidate_terminal,
623
+ visibility.mean(dim=-1, keepdim=True),
624
+ access.mean(dim=-1, keepdim=True),
625
+ persistence.mean(dim=-1, keepdim=True),
626
+ support.mean(dim=-1, keepdim=True),
627
+ reocclusion.mean(dim=-1, keepdim=True),
628
+ disturbance.mean(dim=-1, keepdim=True),
629
+ fold_preservation.mean(dim=-1, keepdim=True),
630
+ lift_risk.mean(dim=-1, keepdim=True),
631
+ ],
632
+ dim=-1,
633
+ )
634
+ hidden = self.network(features)
635
+ residual = self.score_head(hidden).squeeze(-1)
636
+ success = self.success_head(hidden).squeeze(-1)
637
+ risk = torch.sigmoid(self.risk_head(hidden).squeeze(-1))
638
+ if proposal_logits is not None and proposal_logits.shape == residual.shape:
639
+ residual = residual + 0.25 * proposal_logits.sigmoid()
640
+ return {
641
+ "residual_scores": residual,
642
+ "planner_success_logits": success,
643
+ "planner_risk_values": risk,
644
+ }
645
+
646
+
647
+ class AdapterPlanner(nn.Module):
648
+ def __init__(self, config: PlannerConfig) -> None:
649
+ super().__init__()
650
+ self.config = config
651
+ self.gate = ElasticFeasibilityGate(config)
652
+ self.reranker = ResidualActionReranker(config)
653
+
654
+ def select_best(
655
+ self,
656
+ *,
657
+ candidate_chunks: Tensor,
658
+ rollout_state: dict[str, Tensor],
659
+ proposal_mode_names: list[list[str]],
660
+ proposal_logits: Tensor | None = None,
661
+ planning_mode: str = "adapter_active",
662
+ ) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]:
663
+ batch_size = candidate_chunks.shape[0]
664
+ batch_indices = torch.arange(batch_size, device=candidate_chunks.device)
665
+ if planning_mode in {"identity", "trunk_only", "adapter_noop"}:
666
+ zero_scores = candidate_chunks.new_zeros((batch_size, candidate_chunks.shape[1]))
667
+ return {
668
+ "best_indices": torch.zeros(batch_size, dtype=torch.long, device=candidate_chunks.device),
669
+ "best_chunk": candidate_chunks[:, 0],
670
+ "utility_scores": zero_scores,
671
+ "utility_total": zero_scores,
672
+ "planner_success_logits": zero_scores,
673
+ "planner_risk_values": zero_scores,
674
+ "adapter_confidence": candidate_chunks.new_ones((batch_size, candidate_chunks.shape[1])),
675
+ "reject_diagnostics": [[] for _ in range(batch_size)],
676
+ "planning_mode": planning_mode,
677
+ }
678
+
679
+ gate_outputs = self.gate(rollout_state=rollout_state, proposal_mode_names=proposal_mode_names)
680
+ reranker = self.reranker(
681
+ candidate_chunks=candidate_chunks,
682
+ rollout_state=rollout_state,
683
+ proposal_logits=proposal_logits,
684
+ )
685
+ utility = reranker["residual_scores"] + reranker["planner_success_logits"].sigmoid() - reranker["planner_risk_values"]
686
+ visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",)).mean(dim=-1)
687
+ access = _summary_scalar(rollout_state, "access_summary", ("access_quality",)).mean(dim=-1)
688
+ persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon")).mean(dim=-1)
689
+ support = _summary_scalar(rollout_state, "support_summary", ("support_stability",)).mean(dim=-1)
690
+ reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",)).mean(dim=-1)
691
+ disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",)).mean(dim=-1)
692
+ fold_preservation = _optional_summary_scalar(
693
+ rollout_state,
694
+ "fold_preservation_summary",
695
+ reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
696
+ fallback_keys=("fold_preservation",),
697
+ ).mean(dim=-1)
698
+ mouth_aperture = _optional_summary_scalar(
699
+ rollout_state,
700
+ "mouth_aperture_summary",
701
+ reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
702
+ fallback_keys=("mouth_aperture",),
703
+ ).mean(dim=-1)
704
+ layer_separation = _optional_summary_scalar(
705
+ rollout_state,
706
+ "layer_separation_summary",
707
+ reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
708
+ fallback_keys=("layer_separation_quality",),
709
+ ).mean(dim=-1)
710
+ lift_risk = _optional_summary_scalar(
711
+ rollout_state,
712
+ "lift_too_much_risk_summary",
713
+ reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
714
+ fallback_keys=("lift_too_much_risk",),
715
+ ).mean(dim=-1)
716
+ mode_bias = utility.new_zeros(utility.shape)
717
+ stage_penalty = utility.new_zeros(utility.shape)
718
+ unresolved_reveal = (1.0 - visibility) + (1.0 - access)
719
+ stabilized_reveal = 0.5 * (access + persistence + support)
720
+ # Use optimistic scene readiness summaries for stage switching.
721
+ # Candidate-level safety is still enforced by the retrieve gate below, so
722
+ # we should not let one poor candidate keep the entire scene stuck in
723
+ # "reveal forever" mode when another candidate already makes retrieve feasible.
724
+ batch_visibility = visibility.amax(dim=1)
725
+ batch_access = access.amax(dim=1)
726
+ batch_persistence = persistence.amax(dim=1)
727
+ batch_support = support.amax(dim=1)
728
+ batch_reocclusion = reocclusion.amin(dim=1)
729
+ batch_disturbance = disturbance.amin(dim=1)
730
+ batch_fold = fold_preservation.amax(dim=1)
731
+ batch_mouth = mouth_aperture.amax(dim=1)
732
+ batch_layer = layer_separation.amax(dim=1)
733
+ batch_lift = lift_risk.amin(dim=1)
734
+ batch_reveal_readiness = torch.maximum(batch_visibility, batch_access)
735
+ for batch_idx, names in enumerate(proposal_mode_names):
736
+ is_bag = any(any(token in name for token in ("mouth", "rim", "probe_inside")) for name in names)
737
+ is_cloth = any(any(token in name for token in ("fold", "lift", "layer")) for name in names)
738
+ can_retrieve = (
739
+ batch_access[batch_idx] >= self.config.retrieve_stage_access_threshold
740
+ and batch_reveal_readiness[batch_idx] >= self.config.retrieve_stage_reveal_threshold
741
+ and batch_persistence[batch_idx] >= self.config.retrieve_stage_persistence_threshold
742
+ and batch_support[batch_idx] >= self.config.retrieve_stage_support_threshold
743
+ and batch_reocclusion[batch_idx] <= self.config.retrieve_reocclusion_threshold
744
+ )
745
+ if is_bag:
746
+ can_retrieve = bool(
747
+ can_retrieve
748
+ and batch_mouth[batch_idx] >= 0.30
749
+ and batch_persistence[batch_idx] >= 0.55
750
+ )
751
+ elif is_cloth:
752
+ can_retrieve = bool(
753
+ can_retrieve
754
+ and batch_layer[batch_idx] >= 0.18
755
+ and batch_fold[batch_idx] >= 0.60
756
+ and batch_lift[batch_idx] <= 0.30
757
+ and batch_support[batch_idx] >= 0.70
758
+ )
759
+ can_insert = (
760
+ batch_access[batch_idx] >= self.config.insert_stage_access_threshold
761
+ and batch_visibility[batch_idx] >= self.config.insert_stage_visibility_threshold
762
+ and batch_support[batch_idx] >= self.config.insert_stage_support_threshold
763
+ and batch_reocclusion[batch_idx] <= 0.65
764
+ )
765
+ maintain_ready = (
766
+ batch_access[batch_idx] >= self.config.occlusion_maintain_gap_min_access
767
+ and batch_visibility[batch_idx] >= self.config.occlusion_maintain_gap_min_visibility
768
+ )
769
+ if can_retrieve:
770
+ preferred_tokens = ("retrieve",)
771
+ elif can_insert:
772
+ preferred_tokens = ("probe_inside", "insert_actor") if is_bag else ("insert_actor",)
773
+ elif is_bag:
774
+ if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20:
775
+ preferred_tokens = ("widen_mouth", "maintain_mouth")
776
+ else:
777
+ preferred_tokens = ("maintain_mouth", "widen_mouth")
778
+ elif is_cloth:
779
+ if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20:
780
+ preferred_tokens = ("lift_edge", "separate_layer")
781
+ elif batch_lift[batch_idx] > 0.15 or batch_disturbance[batch_idx] > 0.25:
782
+ preferred_tokens = ("stabilize_fold", "maintain_lift")
783
+ else:
784
+ preferred_tokens = ("maintain_lift", "stabilize_fold")
785
+ else:
786
+ if not maintain_ready:
787
+ preferred_tokens = ("widen_gap", "pin_canopy", "sweep_left", "sweep_right")
788
+ elif batch_visibility[batch_idx] < 0.20 or batch_access[batch_idx] < 0.25:
789
+ preferred_tokens = ("widen_gap", "pin_canopy")
790
+ elif batch_disturbance[batch_idx] > 0.25 or batch_reocclusion[batch_idx] > 0.40:
791
+ preferred_tokens = ("maintain_gap", "pin_canopy")
792
+ else:
793
+ preferred_tokens = ("pin_canopy", "widen_gap")
794
+ for candidate_idx, name in enumerate(names[: utility.shape[1]]):
795
+ if name == "base_action":
796
+ continue
797
+ if any(token in name for token in ("retrieve",)):
798
+ bonus = (
799
+ 0.85 * visibility[batch_idx, candidate_idx]
800
+ + 0.85 * access[batch_idx, candidate_idx]
801
+ + 0.65 * persistence[batch_idx, candidate_idx]
802
+ + 0.50 * support[batch_idx, candidate_idx]
803
+ - 0.60 * reocclusion[batch_idx, candidate_idx]
804
+ - 0.25 * disturbance[batch_idx, candidate_idx]
805
+ )
806
+ elif any(token in name for token in ("insert_actor", "probe_inside")):
807
+ bonus = (
808
+ 0.70 * visibility[batch_idx, candidate_idx]
809
+ + 0.70 * access[batch_idx, candidate_idx]
810
+ + 0.35 * persistence[batch_idx, candidate_idx]
811
+ - 0.35 * reocclusion[batch_idx, candidate_idx]
812
+ - 0.15 * disturbance[batch_idx, candidate_idx]
813
+ )
814
+ elif any(token in name for token in ("maintain", "stabilize", "pin_canopy")):
815
+ bonus = (
816
+ 0.85 * stabilized_reveal[batch_idx, candidate_idx]
817
+ + 0.25 * visibility[batch_idx, candidate_idx]
818
+ - 0.20 * reocclusion[batch_idx, candidate_idx]
819
+ - 0.10 * disturbance[batch_idx, candidate_idx]
820
+ )
821
+ else:
822
+ bonus = (
823
+ 0.95 * unresolved_reveal[batch_idx, candidate_idx]
824
+ + 0.20 * (1.0 - persistence[batch_idx, candidate_idx])
825
+ - 0.10 * disturbance[batch_idx, candidate_idx]
826
+ )
827
+ if any(token in name for token in ("fold", "lift", "layer")):
828
+ bonus = bonus + 0.35 * fold_preservation[batch_idx, candidate_idx] - 0.35 * lift_risk[batch_idx, candidate_idx]
829
+ if any(token in name for token in preferred_tokens):
830
+ bonus = bonus + self.config.mode_preference_bonus
831
+ elif "retrieve" in name and not can_retrieve:
832
+ bonus = bonus - self.config.premature_retrieve_penalty
833
+ stage_penalty[batch_idx, candidate_idx] = (
834
+ stage_penalty[batch_idx, candidate_idx] + self.config.premature_retrieve_penalty
835
+ )
836
+ elif is_cloth and any(token in name for token in ("stabilize", "maintain")) and any(
837
+ token in preferred_tokens for token in ("lift_edge", "separate_layer")
838
+ ):
839
+ bonus = bonus - 1.0
840
+ stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.0
841
+ elif (not is_bag and not is_cloth) and any(token in name for token in ("sweep_left", "sweep_right")) and any(
842
+ token in preferred_tokens for token in ("pin_canopy", "widen_gap", "maintain_gap")
843
+ ):
844
+ bonus = bonus - self.config.premature_occlusion_sweep_penalty
845
+ elif any(token in name for token in ("probe_inside", "insert_actor", "retrieve")) and not can_insert:
846
+ bonus = bonus - self.config.premature_insert_penalty
847
+ stage_penalty[batch_idx, candidate_idx] = (
848
+ stage_penalty[batch_idx, candidate_idx] + self.config.premature_insert_penalty
849
+ )
850
+ if (
851
+ (not is_bag and not is_cloth)
852
+ and "maintain_gap" in name
853
+ and not maintain_ready
854
+ and self.config.premature_maintain_penalty > 0.0
855
+ ):
856
+ bonus = bonus - self.config.premature_maintain_penalty
857
+ stage_penalty[batch_idx, candidate_idx] = (
858
+ stage_penalty[batch_idx, candidate_idx] + self.config.premature_maintain_penalty
859
+ )
860
+ if is_bag and (batch_mouth[batch_idx] < 0.18 or batch_access[batch_idx] < 0.15) and "widen_mouth" in name:
861
+ stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5
862
+ if is_cloth and (batch_layer[batch_idx] < 0.12 or batch_visibility[batch_idx] < 0.05) and any(
863
+ token in name for token in ("lift_edge", "separate_layer")
864
+ ):
865
+ stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5
866
+ mode_bias[batch_idx, candidate_idx] = bonus
867
+ utility = utility + mode_bias
868
+ utility = utility + 0.5 * fold_preservation - 0.5 * lift_risk
869
+ utility = utility - stage_penalty
870
+ utility = utility - gate_outputs["penalties"]
871
+ allowed_mask = gate_outputs["allowed_mask"]
872
+ assert isinstance(allowed_mask, Tensor)
873
+ utility = utility.masked_fill(~allowed_mask, -1e6)
874
+ best_indices = utility.argmax(dim=-1)
875
+ best_chunk = candidate_chunks[batch_indices, best_indices]
876
+ return {
877
+ "best_indices": best_indices,
878
+ "best_chunk": best_chunk,
879
+ "utility_scores": utility,
880
+ "utility_total": utility,
881
+ "planner_success_logits": reranker["planner_success_logits"],
882
+ "planner_risk_values": reranker["planner_risk_values"],
883
+ "adapter_confidence": gate_outputs["adapter_confidence"],
884
+ "allowed_mask": gate_outputs["allowed_mask"],
885
+ "reject_diagnostics": gate_outputs["reject_diagnostics"],
886
+ "planning_mode": planning_mode,
887
+ }
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py ADDED
@@ -0,0 +1,855 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from torch import Tensor
8
+
9
+ from models.reveal_head import TASK_METRIC_NAMES, task_metric_valid_mask
10
+
11
+
12
+ @dataclass
13
+ class LossWeights:
14
+ action: float = 1.0
15
+ phase: float = 0.05
16
+ arm_role: float = 0.2
17
+ support_mode: float = 0.1
18
+ corridor: float = 0.1
19
+ persistence: float = 0.05
20
+ disturbance: float = 0.05
21
+ world_model: float = 0.1
22
+ belief: float = 0.05
23
+ visibility: float = 0.05
24
+ clearance: float = 0.05
25
+ support_stability: float = 0.05
26
+ reocclusion: float = 0.05
27
+ occluder_contact: float = 0.05
28
+ grasp_affordance: float = 0.05
29
+ planner_success: float = 0.1
30
+ planner_risk: float = 0.05
31
+ planner_ranking: float = 0.05
32
+ proposal_reconstruction: float = 0.1
33
+ proposal_success: float = 0.05
34
+ proposal_ranking: float = 0.05
35
+ proposal_mode: float = 0.05
36
+ proposal_mode_cloth_only: bool = False
37
+ proposal_mode_task_filter: list[str] | None = None
38
+ proposal_diversity: float = 0.05
39
+ role_swap_consistency: float = 0.05
40
+ task_metrics: float = 0.05
41
+ transition: float = 0.0
42
+ gate: float = 0.0
43
+ distillation: float = 0.0
44
+ calibration: float = 0.0
45
+
46
+
47
+ def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | None = None) -> Tensor:
48
+ loss = F.smooth_l1_loss(pred_actions, target_actions, reduction="none")
49
+ if mask is not None:
50
+ loss = loss * mask.unsqueeze(-1)
51
+ return loss.sum() / mask.sum().clamp_min(1.0)
52
+ return loss.mean()
53
+
54
+
55
+ def _command_probability(command: Tensor) -> Tensor:
56
+ return (torch.tanh(command) + 1.0) * 0.5
57
+
58
+
59
+ def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor:
60
+ open_cmd = action_chunk[..., 0]
61
+ actor_reach = _command_probability(action_chunk[..., 8])
62
+ retrieve_cmd = _command_probability(action_chunk[..., 13])
63
+
64
+ retrieve = retrieve_cmd >= 0.55
65
+ recover = open_cmd <= -0.10
66
+ reveal = open_cmd > 0.35
67
+ hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55)
68
+
69
+ phase_target = torch.zeros_like(open_cmd, dtype=torch.long)
70
+ phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target)
71
+ phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target)
72
+ phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target)
73
+ phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target)
74
+ return phase_target
75
+
76
+
77
+ def _role_targets_like(arm_role_logits: Tensor) -> Tensor:
78
+ role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long)
79
+ expand_shape = [1] * (arm_role_logits.ndim - 2) + [2]
80
+ return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1])
81
+
82
+
83
+ def swap_arm_actions(action_chunk: Tensor) -> Tensor:
84
+ midpoint = action_chunk.shape[-1] // 2
85
+ return torch.cat([action_chunk[..., midpoint:], action_chunk[..., :midpoint]], dim=-1)
86
+
87
+
88
+ def permutation_invariant_role_loss(arm_role_logits: Tensor) -> Tensor:
89
+ role_target = _role_targets_like(arm_role_logits)
90
+ swapped_target = role_target.flip(-1)
91
+ flat_logits = arm_role_logits.reshape(-1, arm_role_logits.shape[-1])
92
+ loss_a = F.cross_entropy(flat_logits, role_target.reshape(-1), reduction="none").view(*role_target.shape)
93
+ loss_b = F.cross_entropy(flat_logits, swapped_target.reshape(-1), reduction="none").view(*role_target.shape)
94
+ return torch.minimum(loss_a.sum(dim=-1), loss_b.sum(dim=-1)).mean()
95
+
96
+
97
+ def role_swap_consistency_loss(pred: Tensor, target: Tensor) -> Tensor:
98
+ return F.smooth_l1_loss(pred, target)
99
+
100
+
101
+ def proposal_diversity_loss(proposal_candidates: Tensor, minimum_distance: float = 0.05) -> Tensor:
102
+ if proposal_candidates.ndim != 4 or proposal_candidates.shape[1] <= 1:
103
+ return proposal_candidates.new_tensor(0.0)
104
+ flat = proposal_candidates.flatten(start_dim=2)
105
+ distances = torch.cdist(flat, flat, p=1)
106
+ eye = torch.eye(distances.shape[-1], device=distances.device, dtype=torch.bool).unsqueeze(0)
107
+ valid = (~eye).expand(distances.shape[0], -1, -1)
108
+ if not valid.any():
109
+ return proposal_candidates.new_tensor(0.0)
110
+ return torch.relu(minimum_distance - distances[valid]).mean()
111
+
112
+
113
+ def proposal_set_reconstruction_loss(proposal_candidates: Tensor, target_candidates: Tensor) -> Tensor:
114
+ if proposal_candidates.ndim != 4 or target_candidates.ndim != 4:
115
+ return proposal_candidates.new_tensor(0.0)
116
+ if proposal_candidates.shape[1] == 0 or target_candidates.shape[1] == 0:
117
+ return proposal_candidates.new_tensor(0.0)
118
+ flat_proposals = proposal_candidates.flatten(start_dim=2)
119
+ flat_targets = target_candidates.flatten(start_dim=2).to(dtype=flat_proposals.dtype)
120
+ distances = torch.cdist(flat_proposals, flat_targets, p=1) / float(max(1, flat_proposals.shape[-1]))
121
+ return 0.5 * (distances.min(dim=-1).values.mean() + distances.min(dim=-2).values.mean())
122
+
123
+
124
+ def _proposal_target_batch(batch: dict[str, Tensor]) -> tuple[Tensor | None, Tensor | None, Tensor | None, Tensor | None]:
125
+ proposal_chunks = batch.get("proposal_target_action_chunks")
126
+ if proposal_chunks is None:
127
+ proposal_chunks = batch.get("candidate_action_chunks")
128
+ proposal_success = batch.get("proposal_target_retrieval_success")
129
+ if proposal_success is None:
130
+ proposal_success = batch.get("candidate_retrieval_success")
131
+ proposal_risk = batch.get("proposal_target_risk")
132
+ if proposal_risk is None:
133
+ proposal_risk = batch.get("candidate_risk")
134
+ proposal_utility = batch.get("proposal_target_utility")
135
+ if proposal_utility is None:
136
+ proposal_utility = batch.get("candidate_utility")
137
+ return proposal_chunks, proposal_success, proposal_risk, proposal_utility
138
+
139
+
140
+ def _proposal_mode_targets(
141
+ proposal_mode_assignments: Tensor,
142
+ proposal_success: Tensor,
143
+ proposal_utility: Tensor,
144
+ num_modes: int,
145
+ ) -> tuple[Tensor, Tensor]:
146
+ batch_size, candidate_count = proposal_success.shape
147
+ mode_assignments = proposal_mode_assignments.view(-1)[:candidate_count].long().to(device=proposal_success.device)
148
+ mode_success = torch.zeros(batch_size, num_modes, dtype=proposal_success.dtype, device=proposal_success.device)
149
+ mode_utility = torch.full(
150
+ (batch_size, num_modes),
151
+ fill_value=-1e6,
152
+ dtype=proposal_utility.dtype,
153
+ device=proposal_utility.device,
154
+ )
155
+ valid_assignment_mask = mode_assignments >= 0
156
+ for mode_idx in range(num_modes):
157
+ mask = mode_assignments == mode_idx
158
+ if not torch.any(mask):
159
+ continue
160
+ mode_success[:, mode_idx] = proposal_success[:, mask].amax(dim=1)
161
+ mode_utility[:, mode_idx] = proposal_utility[:, mask].amax(dim=1)
162
+ no_mode = torch.logical_or(
163
+ ~valid_assignment_mask.any(),
164
+ torch.isclose(mode_success.sum(dim=1), mode_success.new_zeros(batch_size)),
165
+ )
166
+ if torch.any(no_mode):
167
+ mode_utility[no_mode] = 0.0
168
+ return mode_success, mode_utility
169
+
170
+
171
+ def _proposal_reconstruction_targets(
172
+ batch: dict[str, Tensor],
173
+ proposal_count: int,
174
+ fallback_targets: Tensor | None,
175
+ ) -> Tensor | None:
176
+ task_name = batch.get("task_name")
177
+ if isinstance(task_name, str) and task_name == "bag" and fallback_targets is not None:
178
+ return fallback_targets
179
+ teacher_candidates = batch.get("candidate_action_chunks")
180
+ teacher_utility = batch.get("candidate_utility")
181
+ if teacher_candidates is None:
182
+ return fallback_targets
183
+ if teacher_utility is None or teacher_candidates.shape[1] <= 1:
184
+ return teacher_candidates
185
+ top_k = min(teacher_candidates.shape[1], max(1, proposal_count // 2))
186
+ top_indices = teacher_utility.topk(k=top_k, dim=1).indices
187
+ gather_index = top_indices[..., None, None].expand(
188
+ -1,
189
+ -1,
190
+ teacher_candidates.shape[2],
191
+ teacher_candidates.shape[3],
192
+ )
193
+ return teacher_candidates.gather(1, gather_index)
194
+
195
+
196
+ def _task_name_mask(batch: dict[str, Tensor | list[str] | tuple[str, ...] | str], task_name: str, batch_size: int, device: torch.device) -> Tensor | None:
197
+ return _task_name_mask_for_values(batch, [task_name], batch_size=batch_size, device=device)
198
+
199
+
200
+ def _task_name_mask_for_values(
201
+ batch: dict[str, Tensor | list[str] | tuple[str, ...] | str],
202
+ task_names: list[str] | tuple[str, ...],
203
+ batch_size: int,
204
+ device: torch.device,
205
+ ) -> Tensor | None:
206
+ target_names = {str(name) for name in task_names}
207
+ task_names = batch.get("task_name")
208
+ if isinstance(task_names, str):
209
+ return torch.full((batch_size,), task_names in target_names, dtype=torch.bool, device=device)
210
+ if isinstance(task_names, (list, tuple)):
211
+ if len(task_names) < batch_size:
212
+ return None
213
+ values = [str(task_names[idx]) in target_names for idx in range(batch_size)]
214
+ return torch.as_tensor(values, dtype=torch.bool, device=device)
215
+ return None
216
+
217
+
218
+ def _resize_like(target: Tensor, prediction: Tensor) -> Tensor:
219
+ if target.shape == prediction.shape:
220
+ return target
221
+ if target.ndim == prediction.ndim == 4:
222
+ return F.interpolate(target.float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False)
223
+ if target.ndim == 3 and prediction.ndim == 4:
224
+ return F.interpolate(target.unsqueeze(1).float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False)
225
+ return target
226
+
227
+
228
+ def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
229
+ losses = {}
230
+ if "phase_logits" in pred:
231
+ if "phase" in target:
232
+ phase_target = target["phase"].long()
233
+ else:
234
+ action_chunk = target.get("action_chunk")
235
+ if action_chunk is not None:
236
+ phase_target = infer_phase_targets_from_actions(action_chunk[:, 0])
237
+ else:
238
+ phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
239
+ phase_target = phase_map[target["support_mode"].long()]
240
+ losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
241
+ else:
242
+ losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
243
+ if "arm_role_logits" in pred:
244
+ role_ce = permutation_invariant_role_loss(pred["arm_role_logits"])
245
+ role_probs = pred["arm_role_logits"].softmax(dim=-1)
246
+ role_gap = torch.mean(torch.abs(role_probs[:, 0] - role_probs[:, 1]), dim=-1)
247
+ role_separation = torch.relu(0.25 - role_gap).mean()
248
+ losses["arm_role"] = role_ce + 0.5 * role_separation
249
+ else:
250
+ losses["arm_role"] = pred["support_mode_logits"].new_tensor(0.0)
251
+ support_target = target["support_mode"].long()
252
+ losses["support_mode"] = F.cross_entropy(pred["support_mode_logits"], support_target)
253
+ losses["corridor"] = F.binary_cross_entropy_with_logits(
254
+ pred["corridor_logits"],
255
+ target["corridor_feasible"].float(),
256
+ )
257
+ losses["persistence"] = F.mse_loss(pred["persistence_horizon"], target["persistence_horizon"].float())
258
+ losses["disturbance"] = F.mse_loss(pred["disturbance_cost"], target["disturbance_cost"].float())
259
+ if "belief_map" in pred and "belief_map" in target:
260
+ losses["belief"] = F.binary_cross_entropy_with_logits(pred["belief_map"], _resize_like(target["belief_map"].float(), pred["belief_map"]))
261
+ else:
262
+ losses["belief"] = pred["support_mode_logits"].new_tensor(0.0)
263
+ if "visibility_field" in pred and "visibility_map" in target:
264
+ losses["visibility"] = F.binary_cross_entropy_with_logits(
265
+ pred["visibility_field"],
266
+ _resize_like(target["visibility_map"].float(), pred["visibility_field"]),
267
+ )
268
+ else:
269
+ losses["visibility"] = pred["support_mode_logits"].new_tensor(0.0)
270
+ if "clearance_field" in pred and "clearance_map" in target:
271
+ losses["clearance"] = F.binary_cross_entropy_with_logits(
272
+ pred["clearance_field"],
273
+ _resize_like(target["clearance_map"].float(), pred["clearance_field"]),
274
+ )
275
+ else:
276
+ losses["clearance"] = pred["support_mode_logits"].new_tensor(0.0)
277
+ if "support_stability_field" in pred and "support_stability_map" in target:
278
+ losses["support_stability"] = F.binary_cross_entropy_with_logits(
279
+ pred["support_stability_field"],
280
+ _resize_like(target["support_stability_map"].float(), pred["support_stability_field"]),
281
+ )
282
+ else:
283
+ losses["support_stability"] = pred["support_mode_logits"].new_tensor(0.0)
284
+ if "occluder_contact_field" in pred and "occluder_contact_map" in target:
285
+ losses["occluder_contact"] = F.binary_cross_entropy_with_logits(
286
+ pred["occluder_contact_field"],
287
+ _resize_like(target["occluder_contact_map"].float(), pred["occluder_contact_field"]),
288
+ )
289
+ else:
290
+ losses["occluder_contact"] = pred["support_mode_logits"].new_tensor(0.0)
291
+ if "grasp_affordance_field" in pred and "grasp_affordance_map" in target:
292
+ losses["grasp_affordance"] = F.binary_cross_entropy_with_logits(
293
+ pred["grasp_affordance_field"],
294
+ _resize_like(target["grasp_affordance_map"].float(), pred["grasp_affordance_field"]),
295
+ )
296
+ else:
297
+ losses["grasp_affordance"] = pred["support_mode_logits"].new_tensor(0.0)
298
+ if "reocclusion_logit" in pred and "corridor_feasible" in target:
299
+ target_reocclusion = target.get("reocclusion_target")
300
+ if target_reocclusion is None:
301
+ target_reocclusion = 1.0 - target["corridor_feasible"].float().amax(dim=-1)
302
+ if target_reocclusion.ndim < pred["reocclusion_logit"].ndim:
303
+ target_reocclusion = target_reocclusion.unsqueeze(-1).expand_as(pred["reocclusion_logit"])
304
+ losses["reocclusion"] = F.binary_cross_entropy_with_logits(pred["reocclusion_logit"], target_reocclusion)
305
+ else:
306
+ losses["reocclusion"] = pred["support_mode_logits"].new_tensor(0.0)
307
+ if "persistence_uncertainty" in pred:
308
+ losses["uncertainty"] = pred["persistence_uncertainty"].mean()
309
+ else:
310
+ losses["uncertainty"] = pred["support_mode_logits"].new_tensor(0.0)
311
+ task_metric_pairs = tuple(TASK_METRIC_NAMES)
312
+ metric_mask = target.get("task_metric_mask")
313
+ if metric_mask is None:
314
+ target_task_names = target.get("task_name")
315
+ if isinstance(target_task_names, (list, tuple, str)):
316
+ metric_mask = task_metric_valid_mask(
317
+ [str(name) for name in target_task_names] if not isinstance(target_task_names, str) else [target_task_names] * pred["support_mode_logits"].shape[0],
318
+ device=pred["support_mode_logits"].device,
319
+ batch_size=pred["support_mode_logits"].shape[0],
320
+ )
321
+ task_losses = []
322
+ for metric_idx, key in enumerate(task_metric_pairs):
323
+ if key not in pred or key not in target:
324
+ continue
325
+ if metric_mask is None:
326
+ task_losses.append(F.mse_loss(pred[key].float(), target[key].float()))
327
+ continue
328
+ per_sample = F.mse_loss(pred[key].float(), target[key].float(), reduction="none")
329
+ while per_sample.ndim > 1:
330
+ per_sample = per_sample.mean(dim=-1)
331
+ valid = metric_mask[:, metric_idx].to(dtype=per_sample.dtype)
332
+ if valid.sum() <= 0:
333
+ continue
334
+ task_losses.append((per_sample * valid).sum() / valid.sum().clamp_min(1.0))
335
+ losses["task_metrics"] = (
336
+ torch.stack(task_losses).mean()
337
+ if task_losses
338
+ else pred["support_mode_logits"].new_tensor(0.0)
339
+ )
340
+ if "state_confidence_logit" in pred and "state_confidence_target" in target:
341
+ losses["calibration"] = F.binary_cross_entropy_with_logits(
342
+ pred["state_confidence_logit"],
343
+ target["state_confidence_target"].float(),
344
+ )
345
+ else:
346
+ losses["calibration"] = pred["support_mode_logits"].new_tensor(0.0)
347
+ return losses
348
+
349
+
350
+ def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target_rollout: dict[str, Tensor]) -> Tensor:
351
+ has_candidates = pred_rollout["support_mode_logits"].ndim == 4
352
+ candidate_dim = pred_rollout["support_mode_logits"].shape[1] if has_candidates else 1
353
+
354
+ def _expand_target(value: Tensor) -> Tensor:
355
+ if not has_candidates:
356
+ return value
357
+ if value.ndim >= 2 and value.shape[1] == candidate_dim:
358
+ return value
359
+ return value.unsqueeze(1).expand(-1, candidate_dim, *value.shape[1:])
360
+
361
+ def _resize_rollout_target_like(target_value: Tensor, pred_value: Tensor) -> Tensor:
362
+ if target_value.shape == pred_value.shape:
363
+ return target_value
364
+ if pred_value.ndim == 6:
365
+ flat_target = target_value.reshape(-1, target_value.shape[-3], target_value.shape[-2], target_value.shape[-1])
366
+ flat_pred = pred_value.reshape(-1, pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1])
367
+ resized = _resize_like(flat_target.float(), flat_pred)
368
+ return resized.reshape(*pred_value.shape[:-3], pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1])
369
+ if pred_value.ndim == 5:
370
+ flat_target = target_value.reshape(-1, target_value.shape[-2], target_value.shape[-1])
371
+ if flat_target.shape[-2:] != pred_value.shape[-2:]:
372
+ flat_target = F.interpolate(
373
+ flat_target.unsqueeze(1).float(),
374
+ size=pred_value.shape[-2:],
375
+ mode="bilinear",
376
+ align_corners=False,
377
+ ).squeeze(1)
378
+ return flat_target.reshape(*pred_value.shape[:-2], pred_value.shape[-2], pred_value.shape[-1])
379
+ return target_value
380
+
381
+ horizon = min(
382
+ pred_rollout["support_mode_logits"].shape[-2],
383
+ target_rollout["support_mode"].shape[-1],
384
+ )
385
+ pred_rollout = {
386
+ "support_mode_logits": pred_rollout["support_mode_logits"][..., :horizon, :],
387
+ "corridor_logits": pred_rollout["corridor_logits"][..., :horizon, :, :],
388
+ "persistence_horizon": pred_rollout["persistence_horizon"][..., :horizon, :],
389
+ "disturbance_cost": pred_rollout["disturbance_cost"][..., :horizon],
390
+ }
391
+ target_rollout = {
392
+ "support_mode": _expand_target(target_rollout["support_mode"][..., :horizon]),
393
+ "corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
394
+ "persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
395
+ "disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
396
+ "action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]),
397
+ }
398
+ if "phase" in target_rollout:
399
+ target_rollout["phase"] = _expand_target(target_rollout["phase"][..., :horizon])
400
+ corridor_target = _resize_rollout_target_like(
401
+ target_rollout["corridor_feasible"],
402
+ pred_rollout["corridor_logits"],
403
+ )
404
+ loss = (
405
+ F.cross_entropy(
406
+ pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
407
+ target_rollout["support_mode"].reshape(-1).long(),
408
+ )
409
+ + F.binary_cross_entropy_with_logits(
410
+ pred_rollout["corridor_logits"],
411
+ corridor_target.float(),
412
+ )
413
+ + F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
414
+ + F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
415
+ )
416
+ if "phase_logits" in pred_rollout:
417
+ phase_target = target_rollout.get("phase")
418
+ if phase_target is None:
419
+ phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"])
420
+ loss = loss + 0.5 * F.cross_entropy(
421
+ pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]),
422
+ phase_target.reshape(-1),
423
+ )
424
+ if "arm_role_logits" in pred_rollout:
425
+ loss = loss + 0.25 * permutation_invariant_role_loss(pred_rollout["arm_role_logits"])
426
+ optional_pairs = (
427
+ ("target_belief_field", "belief_map", "rollout_belief_map"),
428
+ ("visibility_field", "visibility_map", "rollout_visibility_map"),
429
+ ("clearance_field", "clearance_map", "rollout_clearance_map"),
430
+ ("support_stability_field", "support_stability_map", "rollout_support_stability"),
431
+ ("occluder_contact_field", "occluder_contact_map", "rollout_occluder_contact_map"),
432
+ ("grasp_affordance_field", "grasp_affordance_map", "rollout_grasp_affordance_map"),
433
+ ("reocclusion_field", "reocclusion_map", "rollout_reocclusion_target"),
434
+ )
435
+ for pred_key, _, target_key in optional_pairs:
436
+ if pred_key not in pred_rollout or target_key not in target_rollout:
437
+ continue
438
+ target_value = _expand_target(target_rollout[target_key][..., :horizon, ...])
439
+ pred_value = pred_rollout[pred_key][..., :horizon, :, :, :] if pred_rollout[pred_key].ndim >= 6 else pred_rollout[pred_key][..., :horizon, :, :]
440
+ while target_value.ndim < pred_value.ndim:
441
+ target_value = target_value.unsqueeze(-1)
442
+ if pred_value.ndim >= 5:
443
+ target_value = _resize_rollout_target_like(target_value, pred_value)
444
+ loss = loss + 0.1 * F.binary_cross_entropy_with_logits(pred_value, target_value.float())
445
+ return loss
446
+
447
+
448
+ def compute_total_loss(
449
+ model_output: dict[str, Tensor],
450
+ batch: dict[str, Tensor],
451
+ weights: LossWeights | None = None,
452
+ ) -> dict[str, Tensor]:
453
+ weights = weights or LossWeights()
454
+ losses = {
455
+ "action": chunk_bc_loss(
456
+ model_output["action_mean"],
457
+ batch["action_chunk"],
458
+ mask=batch.get("action_mask"),
459
+ ),
460
+ }
461
+ total = weights.action * losses["action"]
462
+
463
+ state_output = model_output.get("interaction_state")
464
+ if state_output is None:
465
+ state_output = model_output.get("reveal_state")
466
+
467
+ if state_output is not None and "support_mode" in batch:
468
+ reveal_losses = reveal_state_loss(state_output, batch, weights)
469
+ losses.update(reveal_losses)
470
+ total = (
471
+ total
472
+ + weights.phase * reveal_losses["phase"]
473
+ + weights.arm_role * reveal_losses["arm_role"]
474
+ + weights.support_mode * reveal_losses["support_mode"]
475
+ + weights.corridor * reveal_losses["corridor"]
476
+ + weights.persistence * reveal_losses["persistence"]
477
+ + weights.disturbance * reveal_losses["disturbance"]
478
+ + weights.belief * reveal_losses["belief"]
479
+ + weights.visibility * reveal_losses["visibility"]
480
+ + weights.clearance * reveal_losses["clearance"]
481
+ + weights.support_stability * reveal_losses["support_stability"]
482
+ + weights.occluder_contact * reveal_losses["occluder_contact"]
483
+ + weights.grasp_affordance * reveal_losses["grasp_affordance"]
484
+ + weights.reocclusion * reveal_losses["reocclusion"]
485
+ + weights.task_metrics * reveal_losses["task_metrics"]
486
+ + weights.calibration * reveal_losses["calibration"]
487
+ + 0.01 * reveal_losses["uncertainty"]
488
+ )
489
+
490
+ if model_output.get("planned_rollout") and model_output.get("rollout_source", "learned") in {"learned", "lightweight"} and (
491
+ "proposal_target_rollout_support_mode" in batch
492
+ or "candidate_rollout_support_mode" in batch
493
+ or "rollout_support_mode" in batch
494
+ ):
495
+ if "proposal_target_rollout_support_mode" in batch:
496
+ rollout_target = {
497
+ "support_mode": batch["proposal_target_rollout_support_mode"],
498
+ "corridor_feasible": batch["proposal_target_rollout_corridor_feasible"],
499
+ "persistence_horizon": batch["proposal_target_rollout_persistence_horizon"],
500
+ "disturbance_cost": batch["proposal_target_rollout_disturbance_cost"],
501
+ "action_chunk": batch["proposal_target_action_chunks"],
502
+ }
503
+ if "proposal_target_rollout_phase" in batch:
504
+ rollout_target["phase"] = batch["proposal_target_rollout_phase"]
505
+ for optional_key in (
506
+ "proposal_target_rollout_belief_map",
507
+ "proposal_target_rollout_visibility_map",
508
+ "proposal_target_rollout_clearance_map",
509
+ "proposal_target_rollout_support_stability",
510
+ "proposal_target_rollout_reocclusion_target",
511
+ "proposal_target_rollout_occluder_contact_map",
512
+ "proposal_target_rollout_grasp_affordance_map",
513
+ ):
514
+ if optional_key in batch:
515
+ rollout_target[optional_key.replace("proposal_target_", "")] = batch[optional_key]
516
+ elif "candidate_rollout_support_mode" in batch:
517
+ rollout_target = {
518
+ "support_mode": batch["candidate_rollout_support_mode"],
519
+ "corridor_feasible": batch["candidate_rollout_corridor_feasible"],
520
+ "persistence_horizon": batch["candidate_rollout_persistence_horizon"],
521
+ "disturbance_cost": batch["candidate_rollout_disturbance_cost"],
522
+ "action_chunk": batch["candidate_action_chunks"],
523
+ }
524
+ if "candidate_rollout_phase" in batch:
525
+ rollout_target["phase"] = batch["candidate_rollout_phase"]
526
+ for optional_key in (
527
+ "candidate_rollout_belief_map",
528
+ "candidate_rollout_visibility_map",
529
+ "candidate_rollout_clearance_map",
530
+ "candidate_rollout_support_stability",
531
+ "candidate_rollout_reocclusion_target",
532
+ "candidate_rollout_occluder_contact_map",
533
+ "candidate_rollout_grasp_affordance_map",
534
+ ):
535
+ if optional_key in batch:
536
+ rollout_target[optional_key.replace("candidate_", "")] = batch[optional_key]
537
+ planner_indices = model_output.get("planner_topk_indices")
538
+ if planner_indices is not None:
539
+ for key, value in list(rollout_target.items()):
540
+ if isinstance(value, Tensor) and value.ndim >= 2 and value.shape[1] >= planner_indices.shape[1]:
541
+ expand_indices = planner_indices
542
+ while expand_indices.ndim < value.ndim:
543
+ expand_indices = expand_indices.unsqueeze(-1)
544
+ rollout_target[key] = value.gather(
545
+ 1,
546
+ expand_indices.expand(-1, -1, *value.shape[2:]),
547
+ )
548
+ else:
549
+ rollout_target = {
550
+ "support_mode": batch["rollout_support_mode"],
551
+ "corridor_feasible": batch["rollout_corridor_feasible"],
552
+ "persistence_horizon": batch["rollout_persistence_horizon"],
553
+ "disturbance_cost": batch["rollout_disturbance_cost"],
554
+ "action_chunk": batch["action_chunk"],
555
+ }
556
+ if "rollout_phase" in batch:
557
+ rollout_target["phase"] = batch["rollout_phase"]
558
+ for optional_key in (
559
+ "rollout_belief_map",
560
+ "rollout_visibility_map",
561
+ "rollout_clearance_map",
562
+ "rollout_support_stability",
563
+ "rollout_reocclusion_target",
564
+ "rollout_occluder_contact_map",
565
+ "rollout_grasp_affordance_map",
566
+ ):
567
+ if optional_key in batch:
568
+ rollout_target[optional_key] = batch[optional_key]
569
+ world_model_loss = world_model_rollout_consistency_loss(
570
+ model_output["planned_rollout"],
571
+ rollout_target,
572
+ )
573
+ if model_output.get("rollout_source", "learned") == "lightweight":
574
+ losses["transition"] = world_model_loss
575
+ losses["world_model"] = model_output["action_mean"].new_tensor(0.0)
576
+ total = total + weights.transition * world_model_loss
577
+ else:
578
+ losses["world_model"] = world_model_loss
579
+ losses["transition"] = model_output["action_mean"].new_tensor(0.0)
580
+ total = total + weights.world_model * world_model_loss
581
+ else:
582
+ losses["world_model"] = model_output["action_mean"].new_tensor(0.0)
583
+ losses["transition"] = model_output["action_mean"].new_tensor(0.0)
584
+
585
+ if "planner_success_logits" in model_output and "candidate_retrieval_success" in batch:
586
+ success_target = batch["candidate_retrieval_success"].float()
587
+ risk_target = batch.get("candidate_risk")
588
+ if risk_target is None:
589
+ risk_target = torch.clamp(
590
+ batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
591
+ 0.0,
592
+ 1.0,
593
+ )
594
+ utility_target = batch.get("candidate_utility")
595
+ planner_indices = model_output.get("planner_topk_indices")
596
+ if planner_indices is not None and success_target.shape[1] != model_output["planner_success_logits"].shape[1]:
597
+ success_target = success_target.gather(1, planner_indices)
598
+ risk_target = risk_target.gather(1, planner_indices)
599
+ if utility_target is not None:
600
+ utility_target = utility_target.gather(1, planner_indices)
601
+ if utility_target is None:
602
+ utility_target = success_target - risk_target
603
+ elif "utility_structured" in model_output:
604
+ utility_target = 0.5 * utility_target + 0.5 * model_output["utility_structured"].detach()
605
+ planner_scores = model_output["planner_scores"].float().clamp(-20.0, 20.0)
606
+ success_loss = F.binary_cross_entropy_with_logits(model_output["planner_success_logits"], success_target)
607
+ risk_loss = F.mse_loss(model_output["planner_risk_values"], risk_target.float())
608
+ pred_diff = planner_scores.unsqueeze(-1) - planner_scores.unsqueeze(-2)
609
+ target_diff = utility_target.float().unsqueeze(-1) - utility_target.float().unsqueeze(-2)
610
+ ranking_mask = target_diff.abs() > 1e-4
611
+ if ranking_mask.any():
612
+ ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
613
+ else:
614
+ ranking_loss = planner_scores.new_tensor(0.0)
615
+ oracle_target = utility_target.argmax(dim=-1)
616
+ oracle_loss = F.cross_entropy(planner_scores, oracle_target)
617
+ ranking_loss = ranking_loss + 0.5 * oracle_loss
618
+ losses["planner_success"] = success_loss
619
+ losses["planner_risk"] = risk_loss
620
+ losses["planner_ranking"] = ranking_loss
621
+ total = (
622
+ total
623
+ + weights.planner_success * success_loss
624
+ + weights.planner_risk * risk_loss
625
+ + weights.planner_ranking * ranking_loss
626
+ )
627
+ else:
628
+ losses["planner_success"] = model_output["action_mean"].new_tensor(0.0)
629
+ losses["planner_risk"] = model_output["action_mean"].new_tensor(0.0)
630
+ losses["planner_ranking"] = model_output["action_mean"].new_tensor(0.0)
631
+
632
+ if "adapter_confidence" in model_output and "state_confidence_target" in batch:
633
+ confidence = model_output["adapter_confidence"]
634
+ if confidence.ndim > 1:
635
+ confidence = confidence.max(dim=-1).values
636
+ with torch.autocast(device_type=confidence.device.type, enabled=False):
637
+ gate_loss = F.binary_cross_entropy(
638
+ confidence.float().clamp(1e-4, 1.0 - 1e-4),
639
+ batch["state_confidence_target"].float(),
640
+ )
641
+ losses["gate"] = gate_loss
642
+ total = total + weights.gate * gate_loss
643
+ else:
644
+ losses["gate"] = model_output["action_mean"].new_tensor(0.0)
645
+
646
+ if "trunk_action_mean" in model_output:
647
+ distill_loss = chunk_bc_loss(
648
+ model_output["action_mean"],
649
+ model_output["trunk_action_mean"].detach(),
650
+ )
651
+ losses["distillation"] = distill_loss
652
+ total = total + weights.distillation * distill_loss
653
+ else:
654
+ losses["distillation"] = model_output["action_mean"].new_tensor(0.0)
655
+
656
+ proposal_target_chunks, proposal_success_values, proposal_risk_values, proposal_utility_values = _proposal_target_batch(batch)
657
+
658
+ if "proposal_candidates" in model_output:
659
+ reconstruction_losses = []
660
+ batch_size = model_output["proposal_candidates"].shape[0]
661
+ task_names = batch.get("task_name")
662
+ for sample_idx in range(batch_size):
663
+ sample_batch: dict[str, Tensor | str] = {}
664
+ for key in ("candidate_action_chunks", "candidate_utility"):
665
+ value = batch.get(key)
666
+ if value is not None:
667
+ sample_batch[key] = value[sample_idx : sample_idx + 1]
668
+ if isinstance(task_names, list):
669
+ sample_batch["task_name"] = str(task_names[sample_idx])
670
+ elif isinstance(task_names, tuple):
671
+ sample_batch["task_name"] = str(task_names[sample_idx])
672
+ elif isinstance(task_names, str):
673
+ sample_batch["task_name"] = task_names
674
+ fallback_sample = (
675
+ proposal_target_chunks[sample_idx : sample_idx + 1]
676
+ if proposal_target_chunks is not None
677
+ else None
678
+ )
679
+ sample_targets = _proposal_reconstruction_targets(
680
+ sample_batch, # type: ignore[arg-type]
681
+ proposal_count=model_output["proposal_candidates"].shape[1],
682
+ fallback_targets=fallback_sample,
683
+ )
684
+ if sample_targets is None:
685
+ continue
686
+ reconstruction_losses.append(
687
+ proposal_set_reconstruction_loss(
688
+ model_output["proposal_candidates"][sample_idx : sample_idx + 1],
689
+ sample_targets,
690
+ )
691
+ )
692
+ if reconstruction_losses:
693
+ proposal_reconstruction = torch.stack(reconstruction_losses).mean()
694
+ else:
695
+ proposal_reconstruction = model_output["action_mean"].new_tensor(0.0)
696
+ losses["proposal_reconstruction"] = proposal_reconstruction
697
+ total = total + weights.proposal_reconstruction * proposal_reconstruction
698
+ else:
699
+ losses["proposal_reconstruction"] = model_output["action_mean"].new_tensor(0.0)
700
+
701
+ if "proposal_logits" in model_output and proposal_success_values is not None:
702
+ candidate_count = min(
703
+ model_output["proposal_logits"].shape[1],
704
+ proposal_success_values.shape[1],
705
+ )
706
+ proposal_logits = model_output["proposal_logits"][:, :candidate_count]
707
+ proposal_success_target = proposal_success_values[:, :candidate_count].float()
708
+ proposal_utility = proposal_utility_values
709
+ if proposal_utility is None:
710
+ proposal_risk = proposal_risk_values
711
+ if proposal_risk is None:
712
+ proposal_risk = torch.clamp(
713
+ batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
714
+ 0.0,
715
+ 1.0,
716
+ )
717
+ proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count]
718
+ else:
719
+ proposal_utility = proposal_utility[:, :candidate_count]
720
+ proposal_success_loss = F.binary_cross_entropy_with_logits(
721
+ proposal_logits,
722
+ proposal_success_target,
723
+ )
724
+ proposal_pred_diff = proposal_logits.unsqueeze(-1) - proposal_logits.unsqueeze(-2)
725
+ proposal_target_diff = proposal_utility.float().unsqueeze(-1) - proposal_utility.float().unsqueeze(-2)
726
+ proposal_mask = proposal_target_diff.abs() > 1e-4
727
+ if proposal_mask.any():
728
+ proposal_ranking_loss = torch.relu(0.1 - torch.sign(proposal_target_diff) * proposal_pred_diff)[
729
+ proposal_mask
730
+ ].mean()
731
+ else:
732
+ proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
733
+ proposal_oracle_target = proposal_utility.argmax(dim=-1)
734
+ proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target)
735
+ proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss
736
+ losses["proposal_success"] = proposal_success_loss
737
+ losses["proposal_ranking"] = proposal_ranking_loss
738
+ total = (
739
+ total
740
+ + weights.proposal_success * proposal_success_loss
741
+ + weights.proposal_ranking * proposal_ranking_loss
742
+ )
743
+ else:
744
+ losses["proposal_success"] = model_output["action_mean"].new_tensor(0.0)
745
+ losses["proposal_ranking"] = model_output["action_mean"].new_tensor(0.0)
746
+
747
+ if (
748
+ "proposal_mode_logits" in model_output
749
+ and "proposal_mode_assignments" in model_output
750
+ and proposal_success_values is not None
751
+ ):
752
+ candidate_count = min(
753
+ proposal_success_values.shape[1],
754
+ proposal_utility_values.shape[1] if proposal_utility_values is not None else proposal_success_values.shape[1],
755
+ model_output["proposal_mode_assignments"].numel(),
756
+ )
757
+ proposal_success_target = proposal_success_values[:, :candidate_count].float()
758
+ proposal_utility = proposal_utility_values
759
+ if proposal_utility is None:
760
+ proposal_risk = proposal_risk_values
761
+ if proposal_risk is None:
762
+ proposal_risk = torch.clamp(
763
+ batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
764
+ 0.0,
765
+ 1.0,
766
+ )
767
+ proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count]
768
+ else:
769
+ proposal_utility = proposal_utility[:, :candidate_count].float()
770
+ mode_success_target, mode_utility_target = _proposal_mode_targets(
771
+ model_output["proposal_mode_assignments"],
772
+ proposal_success=proposal_success_target,
773
+ proposal_utility=proposal_utility,
774
+ num_modes=model_output["proposal_mode_logits"].shape[1],
775
+ )
776
+ proposal_mode_logits = model_output["proposal_mode_logits"]
777
+ proposal_mode_success_loss = F.binary_cross_entropy_with_logits(
778
+ proposal_mode_logits,
779
+ mode_success_target,
780
+ reduction="none",
781
+ )
782
+ proposal_mode_success_loss = proposal_mode_success_loss.mean(dim=-1)
783
+ proposal_mode_pred_diff = proposal_mode_logits.unsqueeze(-1) - proposal_mode_logits.unsqueeze(-2)
784
+ proposal_mode_target_diff = mode_utility_target.unsqueeze(-1) - mode_utility_target.unsqueeze(-2)
785
+ proposal_mode_mask = proposal_mode_target_diff.abs() > 1e-4
786
+ proposal_mode_ranking_terms = torch.relu(
787
+ 0.1 - torch.sign(proposal_mode_target_diff) * proposal_mode_pred_diff
788
+ )
789
+ proposal_mode_ranking_den = proposal_mode_mask.sum(dim=(-1, -2)).clamp_min(1)
790
+ proposal_mode_ranking_loss = (proposal_mode_ranking_terms * proposal_mode_mask).sum(dim=(-1, -2)) / proposal_mode_ranking_den
791
+ proposal_mode_ranking_loss = torch.where(
792
+ proposal_mode_mask.any(dim=(-1, -2)),
793
+ proposal_mode_ranking_loss,
794
+ proposal_mode_logits.new_zeros(proposal_mode_ranking_loss.shape),
795
+ )
796
+ proposal_mode_oracle_target = mode_utility_target.argmax(dim=-1)
797
+ proposal_mode_oracle_loss = F.cross_entropy(
798
+ proposal_mode_logits,
799
+ proposal_mode_oracle_target,
800
+ reduction="none",
801
+ )
802
+ proposal_mode_loss_per_sample = (
803
+ proposal_mode_success_loss
804
+ + proposal_mode_ranking_loss
805
+ + 0.5 * proposal_mode_oracle_loss
806
+ )
807
+ task_filter = weights.proposal_mode_task_filter
808
+ if task_filter:
809
+ filtered_mask = _task_name_mask_for_values(
810
+ batch,
811
+ task_names=list(task_filter),
812
+ batch_size=proposal_mode_loss_per_sample.shape[0],
813
+ device=proposal_mode_loss_per_sample.device,
814
+ )
815
+ if filtered_mask is not None and filtered_mask.any():
816
+ proposal_mode_loss = proposal_mode_loss_per_sample[filtered_mask].mean()
817
+ else:
818
+ proposal_mode_loss = proposal_mode_logits.new_tensor(0.0)
819
+ elif weights.proposal_mode_cloth_only:
820
+ cloth_mask = _task_name_mask(
821
+ batch,
822
+ task_name="cloth",
823
+ batch_size=proposal_mode_loss_per_sample.shape[0],
824
+ device=proposal_mode_loss_per_sample.device,
825
+ )
826
+ if cloth_mask is not None and cloth_mask.any():
827
+ proposal_mode_loss = proposal_mode_loss_per_sample[cloth_mask].mean()
828
+ else:
829
+ proposal_mode_loss = proposal_mode_logits.new_tensor(0.0)
830
+ else:
831
+ proposal_mode_loss = proposal_mode_loss_per_sample.mean()
832
+ losses["proposal_mode"] = proposal_mode_loss
833
+ total = total + weights.proposal_mode * proposal_mode_loss
834
+ else:
835
+ losses["proposal_mode"] = model_output["action_mean"].new_tensor(0.0)
836
+
837
+ if "proposal_candidates" in model_output:
838
+ diversity_loss = proposal_diversity_loss(model_output["proposal_candidates"])
839
+ losses["proposal_diversity"] = diversity_loss
840
+ total = total + weights.proposal_diversity * diversity_loss
841
+ else:
842
+ losses["proposal_diversity"] = model_output["action_mean"].new_tensor(0.0)
843
+
844
+ if "equivariance_probe_action_mean" in model_output and "equivariance_target_action_mean" in model_output:
845
+ swap_loss = role_swap_consistency_loss(
846
+ model_output["equivariance_probe_action_mean"],
847
+ model_output["equivariance_target_action_mean"].detach(),
848
+ )
849
+ losses["role_swap_consistency"] = swap_loss
850
+ total = total + weights.role_swap_consistency * swap_loss
851
+ else:
852
+ losses["role_swap_consistency"] = model_output["action_mean"].new_tensor(0.0)
853
+
854
+ losses["total"] = total
855
+ return losses
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Sequence
5
+
6
+ import torch
7
+ from torch import Tensor, nn
8
+
9
+ from models.policy import (
10
+ AdapterWrappedPolicy,
11
+ BackboneOnlyPolicy,
12
+ ElasticRevealBimanualPolicy,
13
+ FoundationTrunkPolicy,
14
+ InteractionBimanualPolicy,
15
+ PolicyConfig,
16
+ RevealBimanualPolicy,
17
+ )
18
+ from train.losses import LossWeights, compute_total_loss
19
+
20
+
21
+ @dataclass
22
+ class TrainerConfig:
23
+ policy_type: str = "backbone_only"
24
+ training_regime: str = "adapter_train_frozen_trunk"
25
+ eval_mode: str = "adapter_active"
26
+ use_bf16: bool = True
27
+ grad_clip_norm: float = 1.0
28
+ freeze_backbone: bool = True
29
+ gradient_checkpointing: bool = True
30
+ plan_during_train: bool = True
31
+ plan_during_eval: bool = True
32
+ support_mode_conditioning: bool = True
33
+ planner_mode: str = "trainable"
34
+ use_depth: bool = True
35
+ use_world_model: bool = True
36
+ use_role_tokens: bool = True
37
+ history_steps_override: int | None = None
38
+ compute_equivariance_probe: bool = True
39
+ trainable_parameter_prefixes: tuple[str, ...] = ()
40
+ adapter_mode: str = "adapter_active"
41
+ adapter_use_transition_model: bool = True
42
+ adapter_use_task_conditioning: bool = True
43
+ adapter_action_supervision_source: str = "selected"
44
+
45
+
46
+ def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Module:
47
+ config.backbone.freeze_backbone = trainer_config.freeze_backbone
48
+ config.backbone.gradient_checkpointing = trainer_config.gradient_checkpointing
49
+ if trainer_config.policy_type == "adapter_wrapped":
50
+ return AdapterWrappedPolicy(config)
51
+ if trainer_config.policy_type == "foundation_trunk":
52
+ return FoundationTrunkPolicy(config)
53
+ if trainer_config.policy_type == "elastic_reveal":
54
+ return ElasticRevealBimanualPolicy(config)
55
+ if trainer_config.policy_type == "reveal_state":
56
+ return RevealBimanualPolicy(config)
57
+ if trainer_config.policy_type == "interaction_state":
58
+ return InteractionBimanualPolicy(config)
59
+ return BackboneOnlyPolicy(config)
60
+
61
+
62
+ def policy_supports_planning(policy_type: str) -> bool:
63
+ return policy_type in {"reveal_state", "interaction_state", "elastic_reveal"}
64
+
65
+
66
+ def planner_enabled(trainer_config: TrainerConfig, during_eval: bool) -> bool:
67
+ if not policy_supports_planning(trainer_config.policy_type):
68
+ return False
69
+ if trainer_config.planner_mode == "off":
70
+ return False
71
+ if during_eval:
72
+ return trainer_config.plan_during_eval
73
+ return trainer_config.plan_during_train
74
+
75
+
76
+ def apply_planner_mode(model: nn.Module, trainer_config: TrainerConfig) -> list[str]:
77
+ if trainer_config.planner_mode != "proxy_pretrained":
78
+ return []
79
+ frozen_modules = []
80
+ for module_name in ("interaction_head", "world_model", "planner"):
81
+ module = getattr(model, module_name, None)
82
+ if module is None:
83
+ continue
84
+ frozen_modules.append(module_name)
85
+ for parameter in module.parameters():
86
+ parameter.requires_grad = False
87
+ return frozen_modules
88
+
89
+
90
+ def apply_trainable_parameter_prefixes(model: nn.Module, trainer_config: TrainerConfig) -> list[str]:
91
+ prefixes = tuple(str(prefix) for prefix in trainer_config.trainable_parameter_prefixes)
92
+ if not prefixes:
93
+ return []
94
+ matched = []
95
+ for name, parameter in model.named_parameters():
96
+ trainable = any(name.startswith(prefix) for prefix in prefixes)
97
+ parameter.requires_grad = trainable
98
+ if trainable:
99
+ matched.append(name)
100
+ return matched
101
+
102
+
103
+ class BimanualTrainer:
104
+ def __init__(self, model: nn.Module, optimizer: torch.optim.Optimizer, config: TrainerConfig) -> None:
105
+ self.model = model
106
+ self.optimizer = optimizer
107
+ self.config = config
108
+
109
+ def _autocast_context(self) -> torch.autocast:
110
+ if self.config.use_bf16 and torch.cuda.is_available():
111
+ return torch.autocast(device_type="cuda", dtype=torch.bfloat16)
112
+ return torch.autocast(device_type="cpu", enabled=False)
113
+
114
+ def training_step(self, batch: dict[str, Tensor | Sequence[str]], loss_weights: LossWeights | None = None) -> dict[str, Tensor]:
115
+ self.optimizer.zero_grad(set_to_none=True)
116
+ images = batch["images"]
117
+ proprio = batch["proprio"]
118
+ texts = batch.get("texts")
119
+ language_tokens = batch.get("language_tokens")
120
+ with self._autocast_context():
121
+ task_names = batch.get("task_name")
122
+ texts_value = texts if isinstance(texts, Sequence) and not isinstance(texts, str) else None
123
+ task_name_value = task_names if isinstance(task_names, Sequence) and not isinstance(task_names, str) else None
124
+ forward_kwargs = {
125
+ "images": images,
126
+ "proprio": proprio,
127
+ "texts": texts_value,
128
+ "task_names": task_name_value,
129
+ "task_ids": batch.get("task_id"),
130
+ "language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
131
+ "history_images": batch.get("history_images"),
132
+ "history_proprio": batch.get("history_proprio"),
133
+ "history_actions": batch.get("history_actions"),
134
+ "depths": batch.get("depths"),
135
+ "depth_valid": batch.get("depth_valid"),
136
+ "camera_intrinsics": batch.get("camera_intrinsics"),
137
+ "camera_extrinsics": batch.get("camera_extrinsics"),
138
+ "history_depths": batch.get("history_depths"),
139
+ "history_depth_valid": batch.get("history_depth_valid"),
140
+ "history_camera_intrinsics": batch.get("history_camera_intrinsics"),
141
+ "history_camera_extrinsics": batch.get("history_camera_extrinsics"),
142
+ }
143
+ if policy_supports_planning(self.config.policy_type):
144
+ forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
145
+ forward_kwargs["support_mode_conditioning"] = self.config.support_mode_conditioning
146
+ if "candidate_action_chunks" in batch:
147
+ forward_kwargs["candidate_chunks_override"] = batch["candidate_action_chunks"]
148
+ if self.config.policy_type == "adapter_wrapped":
149
+ forward_kwargs["adapter_mode"] = self.config.adapter_mode
150
+ forward_kwargs["use_transition_model"] = self.config.adapter_use_transition_model
151
+ forward_kwargs["use_task_conditioning"] = self.config.adapter_use_task_conditioning
152
+ if self.config.policy_type == "elastic_reveal":
153
+ forward_kwargs["depths"] = batch.get("depths")
154
+ forward_kwargs["depth_valid"] = batch.get("depth_valid")
155
+ forward_kwargs["camera_intrinsics"] = batch.get("camera_intrinsics")
156
+ forward_kwargs["camera_extrinsics"] = batch.get("camera_extrinsics")
157
+ forward_kwargs["history_depths"] = batch.get("history_depths")
158
+ forward_kwargs["history_depth_valid"] = batch.get("history_depth_valid")
159
+ forward_kwargs["history_camera_intrinsics"] = batch.get("history_camera_intrinsics")
160
+ forward_kwargs["history_camera_extrinsics"] = batch.get("history_camera_extrinsics")
161
+ forward_kwargs["history_camera_valid_mask"] = batch.get("history_camera_valid_mask")
162
+ forward_kwargs["use_depth"] = self.config.use_depth
163
+ forward_kwargs["use_world_model"] = self.config.use_world_model
164
+ forward_kwargs["use_planner"] = planner_enabled(self.config, during_eval=False)
165
+ forward_kwargs["use_role_tokens"] = self.config.use_role_tokens
166
+ forward_kwargs["history_steps_override"] = self.config.history_steps_override
167
+ forward_kwargs["compute_equivariance_probe"] = self.config.compute_equivariance_probe
168
+ elif self.config.policy_type == "interaction_state":
169
+ forward_kwargs["use_role_tokens"] = self.config.use_role_tokens
170
+ forward_kwargs["history_steps_override"] = self.config.history_steps_override
171
+ model_output = self.model(**forward_kwargs)
172
+ if (
173
+ self.config.policy_type == "adapter_wrapped"
174
+ and self.config.adapter_action_supervision_source == "trunk"
175
+ and "trunk_action_mean" in model_output
176
+ ):
177
+ model_output = dict(model_output)
178
+ model_output["selected_action_mean"] = model_output["action_mean"]
179
+ model_output["action_mean"] = model_output["trunk_action_mean"]
180
+ losses = compute_total_loss(model_output, batch, weights=loss_weights)
181
+ losses["total"].backward()
182
+ torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.grad_clip_norm)
183
+ self.optimizer.step()
184
+ return losses
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pytest
2
+
3
+ from eval.public_benchmark_package import build_public_eval_protocol, build_target_training_spec
4
+ from eval.run_public_benchmark_package import summarize_public_benchmark_package
5
+
6
+
7
+ TARGET_TRACK_IDS = [
8
+ "rlbench2_put_bottle_in_fridge",
9
+ "rlbench2_take_out_tray",
10
+ "rlbench2_take_shoes_out_of_box",
11
+ "rlbench2_lift_tray",
12
+ "rlbench2_straighten_rope",
13
+ "rlbench2_sweep_to_dustpan",
14
+ "dexgarmentlab_store_tops",
15
+ "dexgarmentlab_fold_tops",
16
+ "dexgarmentlab_hang_coat",
17
+ ]
18
+
19
+
20
+ def _successes(num_success: int, total: int = 100) -> list[int]:
21
+ return [1] * num_success + [0] * (total - num_success)
22
+
23
+
24
+ def _target_record(track_id: str, adapter_mode: str, seed: int, num_success: int, *, intervention: float, non_base: float) -> dict:
25
+ successes = _successes(num_success)
26
+ record = {
27
+ "track_id": track_id,
28
+ "adapter_mode": adapter_mode,
29
+ "successes": successes,
30
+ "success_rate": sum(successes) / len(successes),
31
+ "episodes": len(successes),
32
+ "seed": seed,
33
+ "eval_protocol": build_public_eval_protocol(track_id=track_id, eval_mode=adapter_mode, seed=seed, episodes=len(successes)),
34
+ "intervention_rate": intervention,
35
+ "non_base_selection_rate": non_base,
36
+ "steps_to_first_reveal_or_access": 8.0,
37
+ "steps_to_retrieve": 22.0,
38
+ "disturbance_proxy": 0.3,
39
+ }
40
+ if adapter_mode != "adapter_noop":
41
+ record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant=adapter_mode, seed=seed)
42
+ else:
43
+ record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant="adapter_active_ft", seed=seed)
44
+ return record
45
+
46
+
47
+ def _hybrid_payloads() -> list[dict]:
48
+ trunk_scores = [35, 30, 27, 28, 26, 32, 24, 22, 27]
49
+ active_scores = [75, 68, 59, 60, 58, 64, 57, 55, 62]
50
+ interventions = [0.30, 0.24, 0.19, 0.18, 0.20, 0.19, 0.22, 0.21, 0.25]
51
+ non_base = [0.40, 0.22, 0.21, 0.20, 0.19, 0.18, 0.23, 0.24, 0.26]
52
+ payloads: list[dict] = []
53
+ for index, track_id in enumerate(TARGET_TRACK_IDS):
54
+ payloads.append(
55
+ _target_record(track_id, "trunk_only_ft", 17, trunk_scores[index], intervention=0.0, non_base=0.0)
56
+ )
57
+ payloads.append(
58
+ _target_record(track_id, "adapter_noop", 17, trunk_scores[index], intervention=0.0, non_base=0.0)
59
+ )
60
+ payloads.append(
61
+ _target_record(
62
+ track_id,
63
+ "adapter_active_ft",
64
+ 17,
65
+ active_scores[index],
66
+ intervention=interventions[index],
67
+ non_base=non_base[index],
68
+ )
69
+ )
70
+ return payloads
71
+
72
+
73
+ def test_public_benchmark_package_summary_passes_with_clear_gain():
74
+ payloads = _hybrid_payloads()
75
+
76
+ summary = summarize_public_benchmark_package(payloads, bootstrap_samples=200, bootstrap_seed=0)
77
+
78
+ assert summary["headline_pass"]
79
+ assert summary["sign_of_life_pass"]
80
+ assert summary["anchor_pass"]
81
+ assert summary["sign_of_life_track_count"] == len(TARGET_TRACK_IDS)
82
+ assert summary["tracks"]["rlbench2_put_bottle_in_fridge"]["delta_active_vs_trunk"] > 0.0
83
+ assert summary["tracks"]["dexgarmentlab_hang_coat"]["delta_active_vs_trunk"] > 0.0
84
+
85
+
86
+ def test_public_benchmark_package_detects_training_mismatch():
87
+ payloads = _hybrid_payloads()
88
+ payloads[8]["train_spec"]["batch_size"] = 64
89
+
90
+ with pytest.raises(ValueError, match="Training fairness mismatch"):
91
+ summarize_public_benchmark_package(payloads, bootstrap_samples=50, bootstrap_seed=0)
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from eval.public_benchmark_package import (
2
+ ANCHOR_ROLE,
3
+ TARGET_ROLE,
4
+ build_public_eval_protocol,
5
+ build_target_training_spec,
6
+ default_public_benchmark_manifest,
7
+ expected_eval_modes,
8
+ public_benchmark_tracks,
9
+ public_protocol_identity_signature,
10
+ training_fairness_signature,
11
+ )
12
+
13
+
14
+ TARGET_TRACK_IDS = [
15
+ "rlbench2_put_bottle_in_fridge",
16
+ "rlbench2_take_out_tray",
17
+ "rlbench2_take_shoes_out_of_box",
18
+ "rlbench2_lift_tray",
19
+ "rlbench2_straighten_rope",
20
+ "rlbench2_sweep_to_dustpan",
21
+ "dexgarmentlab_store_tops",
22
+ "dexgarmentlab_fold_tops",
23
+ "dexgarmentlab_hang_coat",
24
+ ]
25
+
26
+
27
+ def test_public_benchmark_package_contains_expected_tracks():
28
+ manifest = default_public_benchmark_manifest()
29
+
30
+ assert manifest["package_name"] == "public_hybrid_bimanual_benchmark_v1"
31
+ assert manifest["target_track_ids"] == TARGET_TRACK_IDS
32
+ assert manifest["anchor_track_ids"] == []
33
+ assert manifest["thresholds"]["anchor_tolerance"] == 0.02
34
+
35
+
36
+ def test_public_target_protocol_identity_is_mode_invariant():
37
+ protocol_signatures = {
38
+ public_protocol_identity_signature(
39
+ build_public_eval_protocol(track_id="rlbench2_put_bottle_in_fridge", eval_mode=mode, seed=17)
40
+ )
41
+ for mode in expected_eval_modes("rlbench2_put_bottle_in_fridge")
42
+ }
43
+
44
+ assert len(protocol_signatures) == 1
45
+
46
+
47
+ def test_public_package_has_no_anchor_tracks_in_the_current_hybrid_battery():
48
+ assert public_benchmark_tracks(ANCHOR_ROLE) == []
49
+
50
+
51
+ def test_training_fairness_signature_matches_for_trunk_and_adapter():
52
+ trunk = build_target_training_spec(track_id="dexgarmentlab_hang_coat", model_variant="trunk_only_ft", seed=17)
53
+ active = build_target_training_spec(track_id="dexgarmentlab_hang_coat", model_variant="adapter_active_ft", seed=17)
54
+
55
+ assert training_fairness_signature(trunk) == training_fairness_signature(active)
56
+
57
+
58
+ def test_public_track_roles_are_partitioned():
59
+ target_roles = {track.track_id: track.role for track in public_benchmark_tracks(TARGET_ROLE)}
60
+ anchor_roles = {track.track_id: track.role for track in public_benchmark_tracks(ANCHOR_ROLE)}
61
+
62
+ assert target_roles == {track_id: TARGET_ROLE for track_id in TARGET_TRACK_IDS}
63
+ assert anchor_roles == {}
code/VLAarchtests4_root/MODEL_AND_ARTIFACT_INDEX.md ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Model And Artifact Index
2
+
3
+ ## Main Code Roots
4
+
5
+ - `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/`
6
+ - `code/VLAarchtests2_code/VLAarchtests/tests/`
7
+
8
+ ## Main Prior Handoff Roots
9
+
10
+ - `artifacts/`
11
+ - `docs/`
12
+ - `legacy/`
13
+ - `setup/`
14
+
15
+ ## Main Current Public Benchmark Roots
16
+
17
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
18
+ - strongest current dense-occlusion result
19
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
20
+ - side-by-side visual rerender of the final dense-occlusion held-out benchmark
21
+ - `reports/maniskill_bag_bridge_eval_less_bonus_seed17/`
22
+ - `reports/maniskill_bag_bridge_eval_less_bonus_seed23/`
23
+ - `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
24
+ - `reports/maniskill_cloth_bridge_smoke_v1/`
25
+ - `reports/maniskill_cloth_bridge_val_sweep_seed23/`
26
+
27
+ ## Main Current Checkpoint Roots
28
+
29
+ - `outputs/maniskill_pickclutter_smoke_v5/`
30
+ - `outputs/maniskill_bag_bridge_smoke_v1/`
31
+ - `outputs/maniskill_cloth_bridge_smoke_v1/`
32
+
33
+ ## Main Current Dataset Roots
34
+
35
+ - `data/maniskill_pickclutter/`
36
+ - `data/maniskill_bridge_retrieval/`
37
+ - `data/reveal_proxy/`
38
+
39
+ ## Main Current Docs
40
+
41
+ - `docs/maniskill_pickclutter_correction_log_2026-04-01.md`
42
+ - `docs/public_bridge_smoke_run_log_2026-04-01.md`
43
+ - `docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md`
44
+
45
+ ## Main Current Render Code
46
+
47
+ - `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/render_maniskill_pickclutter_benchmark_gifs.py`
48
+
49
+ ## Repo History
50
+
51
+ - `history/VLAarchtests_previous_README.md`
52
+ - `history/VLAarchtests2_previous_README.md`
53
+ - `history/VLAarchtests3_previous_README.md`
code/VLAarchtests4_root/PUBLIC_BENCHMARK_RESULTS.md ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Public Benchmark Results
2
+
3
+ All dates below refer to `2026-04-01 UTC`.
4
+
5
+ ## Dense Occluded Retrieval Proxy
6
+
7
+ Benchmark:
8
+
9
+ - ManiSkill `PickClutterYCB-v1`
10
+
11
+ ### Completed runs
12
+
13
+ - `reports/maniskill_pickclutter_smoke/public_benchmark_package_summary.json`
14
+ - `trunk = 0.04`
15
+ - `noop = 0.04`
16
+ - `active = 0.04`
17
+ - `reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.json`
18
+ - `trunk = 0.04`
19
+ - `noop = 0.32`
20
+ - `active = 0.32`
21
+ - not adapter-specific because `active == noop`
22
+ - `reports/maniskill_pickclutter_smoke_v3/public_benchmark_package_summary.json`
23
+ - `trunk = 0.06`
24
+ - `noop = 0.06`
25
+ - `active = 0.06`
26
+ - `reports/maniskill_pickclutter_smoke_v4/public_benchmark_package_summary.json`
27
+ - `trunk = 0.48`
28
+ - `noop = 0.04`
29
+ - `active = 0.04`
30
+ - active intervened but regressed badly
31
+ - `reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json`
32
+ - `trunk = 0.06`
33
+ - `noop = 0.06`
34
+ - `active = 0.62`
35
+ - `delta = +0.56`
36
+ - eval-probe only, not a clean retrain
37
+ - `reports/maniskill_pickclutter_smoke_v5/public_benchmark_package_summary.json`
38
+ - `trunk = 0.04`
39
+ - `noop = 0.04`
40
+ - `active = 0.04`
41
+ - fairness-preserving retrain, but active still failed
42
+ - `reports/maniskill_pickclutter_smoke_v5_val_sweep/summary.json`
43
+ - val-only planner sweep
44
+ - `baseline_corrected = 0.00`
45
+ - `soft_pref = 0.00`
46
+ - `softer_pref = 0.625`
47
+ - `retrieve_open = 0.625`
48
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
49
+ - `trunk = 0.04`
50
+ - `noop = 0.04`
51
+ - `active = 0.62`
52
+ - `delta = +0.58`
53
+ - `95% CI = [0.44, 0.72]`
54
+ - `intervention_rate = 1.0`
55
+ - `non_base_selection_rate = 1.0`
56
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
57
+ - full rerender of all `50` held-out seeds for `trunk_only_ft` and `adapter_active_ft`
58
+ - includes `index.html`, `INDEX.md`, and `manifest.json`
59
+ - rerender manifest reports `0` success mismatches against the saved benchmark json files
60
+
61
+ ### Exact `smoke_v5` eval tuning carried to held-out
62
+
63
+ - `mode_preference_bonus = 0.75`
64
+ - `premature_retrieve_penalty = 0.5`
65
+ - `premature_insert_penalty = 0.25`
66
+ - `premature_maintain_penalty = 1.0`
67
+ - `occlusion_maintain_gap_min_access = 0.30`
68
+ - `occlusion_maintain_gap_min_visibility = 0.20`
69
+ - `retrieve_stage_access_threshold = 0.18`
70
+ - `retrieve_stage_reveal_threshold = 0.18`
71
+ - `retrieve_stage_support_threshold = 0.18`
72
+
73
+ ## Bag Retrieval Proxy
74
+
75
+ Benchmark:
76
+
77
+ - ManiSkill public bridge basket retrieval proxy
78
+
79
+ ### Completed runs
80
+
81
+ - `reports/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17.json`
82
+ - `0.32`
83
+ - `reports/maniskill_bag_bridge_smoke_v1/adapter_noop_seed17.json`
84
+ - `0.00`
85
+ - `reports/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17.json`
86
+ - `0.48`
87
+
88
+ - `reports/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23.json`
89
+ - `0.48`
90
+ - `reports/maniskill_bag_bridge_smoke_v1/adapter_noop_seed23.json`
91
+ - `0.08`
92
+ - `reports/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23.json`
93
+ - `0.00`
94
+
95
+ ### Seed-23 validation sweep
96
+
97
+ - `reports/maniskill_bag_bridge_val_sweep_seed23/summary.json`
98
+
99
+ Configs:
100
+
101
+ - `default`
102
+ - `trunk = 0.125`
103
+ - `noop = 0.125`
104
+ - `active = 0.00`
105
+ - `less_bonus`
106
+ - `trunk = 0.125`
107
+ - `noop = 0.125`
108
+ - `active = 0.125`
109
+ - intervention preserved
110
+ - `conservative`
111
+ - `trunk = 0.125`
112
+ - `noop = 0.125`
113
+ - `active = 0.125`
114
+ - intervention effectively disabled
115
+ - `low_bonus_high_thresh`
116
+ - `trunk = 0.125`
117
+ - `noop = 0.125`
118
+ - `active = 0.125`
119
+ - intervention effectively disabled
120
+
121
+ ### Corrected held-out evals
122
+
123
+ - `reports/maniskill_bag_bridge_eval_less_bonus_seed17/public_benchmark_package_summary.json`
124
+ - `trunk = 0.32`
125
+ - `noop = 0.00`
126
+ - `active = 0.48`
127
+ - `delta = +0.16`
128
+ - `reports/maniskill_bag_bridge_eval_less_bonus_seed23/public_benchmark_package_summary.json`
129
+ - `trunk = 0.48`
130
+ - `noop = 0.08`
131
+ - `active = 0.48`
132
+ - `delta = +0.00`
133
+ - `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
134
+ - `trunk = 0.40`
135
+ - `noop = 0.04`
136
+ - `active = 0.48`
137
+ - `delta = +0.08`
138
+ - run-bootstrap CI `[0.00, 0.16]`
139
+
140
+ ## Cloth Retrieval Proxy
141
+
142
+ Benchmark:
143
+
144
+ - ManiSkill public bridge cloth retrieval proxy
145
+
146
+ ### Completed held-out seeds
147
+
148
+ - `seed17`
149
+ - `trunk = 0.04`
150
+ - `noop = 0.04`
151
+ - `active = 0.10`
152
+ - `intervention = 0.3369`
153
+ - `non_base = 0.2674`
154
+ - `seed23`
155
+ - `trunk = 0.04`
156
+ - `noop = 0.02`
157
+ - `active = 0.02`
158
+ - `intervention = 0.0`
159
+ - `non_base = 0.0`
160
+ - `seed29`
161
+ - `trunk = 0.04`
162
+ - `noop = 0.04`
163
+ - `active = 0.04`
164
+ - `intervention = 0.0`
165
+ - `non_base = 0.0`
166
+
167
+ 3-seed aggregate:
168
+
169
+ - `trunk = 0.0400`
170
+ - `noop = 0.0333`
171
+ - `active = 0.0533`
172
+ - `delta = +0.0133`
173
+
174
+ ### Seed-23 cloth validation sweep
175
+
176
+ - `reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json`
177
+
178
+ Configs:
179
+
180
+ - `default`
181
+ - `trunk = 0.25`
182
+ - `noop = 0.125`
183
+ - `active = 0.125`
184
+ - `intervention = 0.0`
185
+ - `low_thresh`
186
+ - `active = 0.125`
187
+ - `intervention = 0.2`
188
+ - `non_base = 0.0667`
189
+ - `low_thresh_less_bonus`
190
+ - `active = 0.125`
191
+ - `intervention = 0.2`
192
+ - `non_base = 0.0667`
193
+ - `very_low_thresh_less_bonus`
194
+ - `active = 0.125`
195
+ - `intervention = 1.0`
196
+ - `non_base = 0.5333`
197
+
198
+ Interpretation:
199
+
200
+ - seed23 cloth was not recoverable by eval-side planner tuning alone
201
+
202
+ ## Single-Seed Combined Proxy Suite
203
+
204
+ - `reports/public_proxy_suite_smoke_v1/combined_summary.json`
205
+
206
+ Single-seed summary:
207
+
208
+ - occlusion proxy: `+0.58`
209
+ - bag proxy: `+0.16`
210
+ - cloth proxy: `+0.06`
211
+ - macro delta: `+0.267`
212
+
213
+ This combined single-seed picture is useful historically, but the stronger current read is:
214
+
215
+ - occlusion: strong
216
+ - bag: modestly positive across corrected 2-seed evaluation
217
+ - cloth: weak/inconclusive across 3 seeds
code/VLAarchtests4_root/README.md ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - robotics
4
+ - vision-language-action
5
+ - bimanual-manipulation
6
+ - maniskill
7
+ - rlbench
8
+ - rgbd
9
+ ---
10
+
11
+ # VLAarchtests4
12
+
13
+ `VLAarchtests4` is the fresh organization repo for the RunPod work staged from `/workspace` on `2026-04-01 UTC`.
14
+
15
+ It carries forward the earlier repo lineage and adds the current public-sim benchmark package work:
16
+
17
+ - `VLAarchtests`
18
+ - early proxy + RLBench architecture search, handoff checkpoints, and environment recreation files from the `2026-03-25/26` sessions
19
+ - `VLAarchtests2`
20
+ - larger exploratory organization repo with more baselines, overlap/anchor work, frequent model changes, mixed artifacts, and several results that required later reinterpretation
21
+ - `VLAarchtests3`
22
+ - cleaned export focused on the elastic-occlusion `trunk + structured adapter + no-op fallback` refactor, validated tests, current checkpoints, and handoff docs
23
+ - `VLAarchtests4`
24
+ - keeps the `VLAarchtests3` export intact and adds the full current workspace `reports/`, `outputs/`, and `data/` trees, including all public benchmark smoke runs, checkpoint directories, dataset bundles, validation sweeps, and environment snapshots from the public-sim evaluation pass
25
+
26
+ ## What This Repo Adds
27
+
28
+ The main new addition in this repo is the public benchmark track work for the elastic-occlusion adapter:
29
+
30
+ - real public-sim smoke runs on:
31
+ - ManiSkill `PickClutterYCB-v1` as the dense occluded retrieval proxy
32
+ - ManiSkill bridge basket retrieval proxy as the bag retrieval proxy
33
+ - ManiSkill bridge cloth retrieval proxy as the folded-cloth retrieval proxy
34
+ - the public benchmark package code and summaries
35
+ - the train/eval logs, checkpoints, cached datasets, validation sweeps, and correction logs for those runs
36
+ - full visual rerenders of the final `smoke_v5_eval_tuned_softerpref` dense-occlusion benchmark for both `trunk_only_ft` and `adapter_active_ft`
37
+ - the same-machine environment snapshot for the public benchmark stack used on this RunPod
38
+
39
+ ## Top-Level Contents
40
+
41
+ - `code/`
42
+ - the cleaned code snapshot inherited from `VLAarchtests3`
43
+ - `artifacts/`
44
+ - prior staged checkpoints, proxy data, reports, and generated configs already bundled by `VLAarchtests3`
45
+ - `docs/`
46
+ - prior handoff/audit docs plus the current public benchmark run logs and correction notes
47
+ - `legacy/`
48
+ - older exact artifacts preserved by `VLAarchtests3`
49
+ - `setup/`
50
+ - prior environment files plus a new public benchmark environment snapshot under `setup/public_benchmark/`
51
+ - `history/`
52
+ - copied README history for `VLAarchtests`, `VLAarchtests2`, and `VLAarchtests3`
53
+ - `reports/`
54
+ - the full current `/workspace/workspace/reports` tree from this machine
55
+ - `outputs/`
56
+ - the full current `/workspace/workspace/outputs` tree from this machine
57
+ - `data/`
58
+ - the full current `/workspace/workspace/data` tree from this machine
59
+ - `PUBLIC_BENCHMARK_RESULTS.md`
60
+ - compact index of all public benchmark train/eval results from this session
61
+ - `MODEL_AND_ARTIFACT_INDEX.md`
62
+ - practical map of the main artifact roots to start from
63
+
64
+ ## Benchmark GIF Renders
65
+
66
+ The repo now also includes a full rendered replay of the final dense-occlusion benchmark:
67
+
68
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
69
+ - `50` held-out `trunk_only_ft` gifs
70
+ - `50` held-out `adapter_active_ft` gifs
71
+ - `index.html`, `INDEX.md`, and `manifest.json` for browsing and validation
72
+ - renderer:
73
+ - `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/render_maniskill_pickclutter_benchmark_gifs.py`
74
+
75
+ Important caveats:
76
+
77
+ - these gifs are rerendered from the saved `smoke_v5_eval_tuned_softerpref` checkpoints and exact held-out seeds, not a different benchmark run
78
+ - the rerender kept the same `softer_pref` planner override used in the reported held-out result
79
+ - the rerender manifest records `0` success mismatches versus the saved benchmark json files
80
+ - only the dense-occlusion track has this full gif export right now
81
+
82
+ ## Architecture State Carried Forward
83
+
84
+ The core model family inherited from `VLAarchtests3` is still:
85
+
86
+ - `trunk_only`
87
+ - `adapter_noop`
88
+ - `adapter_active`
89
+
90
+ The important architectural state carried into the public benchmark work is:
91
+
92
+ - wrapped-policy interface with exact `trunk_only`, `adapter_noop`, and `adapter_active` modes
93
+ - structured reveal/retrieve adapter with:
94
+ - state prediction
95
+ - task-routed proposal families
96
+ - retrieve-feasibility gating
97
+ - lightweight transition model
98
+ - planner/reranker
99
+ - planner fixes that replaced hard vetoes with softer stage penalties in:
100
+ - `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py`
101
+
102
+ ## Public Benchmark Summary
103
+
104
+ Detailed per-run results are in `PUBLIC_BENCHMARK_RESULTS.md`. The short version is:
105
+
106
+ ### 1. Dense occluded retrieval proxy
107
+
108
+ Benchmark:
109
+
110
+ - ManiSkill `PickClutterYCB-v1`
111
+
112
+ Best current held-out result:
113
+
114
+ - directory:
115
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
116
+ - summary:
117
+ - `trunk_only_ft = 0.04`
118
+ - `adapter_noop = 0.04`
119
+ - `adapter_active_ft = 0.62`
120
+ - `delta_active_vs_trunk = +0.58`
121
+ - `95% CI = [0.44, 0.72]`
122
+ - `intervention_rate = 1.0`
123
+ - `non_base_selection_rate = 1.0`
124
+
125
+ Important caveat:
126
+
127
+ - this was not a new retrain after `smoke_v5`
128
+ - it used the same `smoke_v5` checkpoints with planner hyperparameters selected on the frozen validation split and then applied once to the untouched held-out split
129
+
130
+ ### 2. Bag retrieval proxy
131
+
132
+ Benchmark:
133
+
134
+ - public ManiSkill bridge basket retrieval proxy
135
+
136
+ Current fair read:
137
+
138
+ - seed `17` corrected held-out:
139
+ - `trunk = 0.32`
140
+ - `noop = 0.00`
141
+ - `active = 0.48`
142
+ - seed `23` corrected held-out:
143
+ - `trunk = 0.48`
144
+ - `noop = 0.08`
145
+ - `active = 0.48`
146
+ - corrected 2-seed aggregate:
147
+ - `trunk = 0.40`
148
+ - `noop = 0.04`
149
+ - `active = 0.48`
150
+ - `delta = +0.08`
151
+
152
+ Interpretation:
153
+
154
+ - bag remains modestly positive after using one consistent corrected planner across seeds
155
+ - the effect is smaller and less clean than the best occlusion result
156
+
157
+ ### 3. Cloth retrieval proxy
158
+
159
+ Benchmark:
160
+
161
+ - public ManiSkill bridge cloth retrieval proxy
162
+
163
+ Current read:
164
+
165
+ - seed `17`:
166
+ - `trunk = 0.04`
167
+ - `noop = 0.04`
168
+ - `active = 0.10`
169
+ - seed `23`:
170
+ - `trunk = 0.04`
171
+ - `noop = 0.02`
172
+ - `active = 0.02`
173
+ - seed `29`:
174
+ - `trunk = 0.04`
175
+ - `noop = 0.04`
176
+ - `active = 0.04`
177
+ - 3-seed aggregate:
178
+ - `trunk = 0.0400`
179
+ - `noop = 0.0333`
180
+ - `active = 0.0533`
181
+ - `delta = +0.0133`
182
+
183
+ Interpretation:
184
+
185
+ - cloth is weak and unstable
186
+ - current evidence does not support a strong cloth-specific win
187
+
188
+ ## Important Fairness Notes
189
+
190
+ The fairness story is mixed and should be stated plainly.
191
+
192
+ What is fair in the strongest public benchmark result:
193
+
194
+ - same initialization checkpoint for `trunk_only_ft` and `adapter_active_ft`
195
+ - same train/val/test split within each task
196
+ - same optimizer, LR, batch size, and unfreeze scope within each task
197
+ - `adapter_noop` is evaluated from the same adapter checkpoint as `adapter_active_ft`
198
+ - the held-out test episodes were not hand-picked after seeing outcomes
199
+
200
+ What is not fully paper-clean yet:
201
+
202
+ - most current public benchmark evidence is smoke-scale and low-seed
203
+ - the occlusion headline result depends on validation-selected planner tuning on top of a fixed checkpoint
204
+ - bag required eval-side planner correction for one seed to avoid a collapse
205
+ - cloth remains weak even after additional seeds and val sweeps
206
+
207
+ ### PickClutter Split Fairness
208
+
209
+ The important point for the dense-occlusion track is that the dataset split did not drift across the early smoke versions.
210
+
211
+ - `data/maniskill_pickclutter/smoke_v1/episode_splits.json`
212
+ - `data/maniskill_pickclutter/smoke_v2/episode_splits.json`
213
+ - `data/maniskill_pickclutter/smoke_v3/episode_splits.json`
214
+
215
+ These files contain the same episode ids:
216
+
217
+ - train: `170000..170031`
218
+ - val: `171000..171007`
219
+ - eval: `172000..172049`
220
+
221
+ Also:
222
+
223
+ - there is no `data/maniskill_pickclutter/smoke_v4/`
224
+ - there is no `data/maniskill_pickclutter/smoke_v5/`
225
+
226
+ `smoke_v4` and `smoke_v5` were code/report version labels, not new held-out episode bundles.
227
+
228
+ ### What Changed Across PickClutter Versions
229
+
230
+ The big changes across `smoke_v2`, `smoke_v3`, `smoke_v4`, and `smoke_v5` were:
231
+
232
+ - more benchmark-derived state supervision
233
+ - transition-model training enablement
234
+ - planner bug fixes
235
+ - fairness fixes so the adapter checkpoint did not hide a stronger shared trunk
236
+ - then a frozen-validation planner sweep for the final held-out eval
237
+
238
+ The big occlusion win was not caused by changing the eval episodes.
239
+
240
+ ### Dense-Occlusion Render Artifacts
241
+
242
+ The final dense-occlusion run also has a full visual export in:
243
+
244
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
245
+
246
+ Those gifs show the robot interacting with the 3D scene and overlay the adaptor state per frame. For `adapter_active_ft`, the overlay includes:
247
+
248
+ - adaptor on/off state
249
+ - whether a non-base proposal was selected
250
+ - candidate index
251
+ - planner name
252
+ - planner score/confidence
253
+ - state signals such as visibility, access, gap, and damage
254
+
255
+ ## Crucial Caveats
256
+
257
+ ### Occlusion result was planner-tuned
258
+
259
+ The large jump in:
260
+
261
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
262
+
263
+ came from validation-selected planner tuning on top of the same `smoke_v5` checkpoint.
264
+
265
+ The selected override values were:
266
+
267
+ - `mode_preference_bonus = 0.75`
268
+ - `premature_retrieve_penalty = 0.5`
269
+ - `premature_insert_penalty = 0.25`
270
+ - `premature_maintain_penalty = 1.0`
271
+ - `occlusion_maintain_gap_min_access = 0.30`
272
+ - `occlusion_maintain_gap_min_visibility = 0.20`
273
+ - `retrieve_stage_access_threshold = 0.18`
274
+ - `retrieve_stage_reveal_threshold = 0.18`
275
+ - `retrieve_stage_support_threshold = 0.18`
276
+
277
+ That was a validation-only selection step. It was not a fresh retrain.
278
+
279
+ ### Bag and cloth did not use real depth
280
+
281
+ The bridge-task runner for the bag and cloth proxies used:
282
+
283
+ - one real RGB camera
284
+ - copied into all camera slots
285
+ - zero-filled depth channels
286
+
287
+ The runner labels this stack:
288
+
289
+ - `rgb_triplicate_zero_depth`
290
+
291
+ This is a real limitation and it should not be hidden.
292
+
293
+ It happened because the bridge proxy runner used a compatibility shim to satisfy the shared multi-camera tensor interface without plumbing real bridge-scene multiview depth through the stack.
294
+
295
+ Consequences:
296
+
297
+ - bag and cloth are not modality-matched to the PickClutter runs
298
+ - PickClutter used real `rgbd_3cam`
299
+ - bag and cloth used weaker perception input
300
+
301
+ ### Bag and cloth also used a different control wrapper
302
+
303
+ PickClutter:
304
+
305
+ - observation stack: `rgbd_3cam`
306
+ - action space: `bimanual_delta_pose`
307
+
308
+ Bag and cloth:
309
+
310
+ - observation stack: `rgb_triplicate_zero_depth`
311
+ - action space: `widowx_delta_pose`
312
+
313
+ So the cross-track story is architecture-consistent but not fully input/control-identical.
314
+
315
+ ### `smoke_v4_evalprobe_fromv3` is not a clean retrain result
316
+
317
+ This run:
318
+
319
+ - `reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/`
320
+
321
+ used corrected planner logic on top of `smoke_v3` weights. It is useful evidence that the active adapter can matter, but it is not a clean end-to-end retrain.
322
+
323
+ ## What Was Actually Learned
324
+
325
+ The current repo supports the following claims:
326
+
327
+ - the structured adapter is still alive
328
+ - the active branch can clearly matter on a real public dense-occlusion benchmark proxy
329
+ - `adapter_noop` remains a useful fairness control
330
+ - bag-like retrieval still shows modest positive evidence
331
+ - cloth-like retrieval is currently the weak link
332
+
333
+ It does not support the following stronger claims yet:
334
+
335
+ - broad superiority on realistic manipulation benchmarks
336
+ - stable multi-seed wins across all three target-like public proxy tracks
337
+ - a clean modality-matched comparison across occlusion, bag, and cloth
338
+
339
+ ## Environment And Setup
340
+
341
+ Two environment stories exist in this repo.
342
+
343
+ ### Prior `VLAarchtests3` / RLBench stack
344
+
345
+ Preserved under:
346
+
347
+ - `setup/ENVIRONMENT.md`
348
+ - `setup/env_vars.sh`
349
+ - `setup/rlbench_pip_freeze.txt`
350
+
351
+ This is the older RLBench / AnyBimanual oriented environment.
352
+
353
+ ### Current public benchmark stack
354
+
355
+ Preserved under:
356
+
357
+ - `setup/public_benchmark/ENVIRONMENT.md`
358
+ - `setup/public_benchmark/env_vars.sh`
359
+ - `setup/public_benchmark/python_version.txt`
360
+ - `setup/public_benchmark/uname.txt`
361
+ - `setup/public_benchmark/nvidia_smi.txt`
362
+ - `setup/public_benchmark/gpu_short.txt`
363
+ - `setup/public_benchmark/pip_freeze_python311.txt`
364
+ - `setup/public_benchmark/rlbench_env_pip_freeze.txt`
365
+ - `setup/public_benchmark/hf_env.txt`
366
+
367
+ The public benchmark runs in this session were assembled on:
368
+
369
+ - GPU: `NVIDIA L40S`
370
+ - VRAM: `46068 MiB`
371
+ - driver: `580.126.09`
372
+ - Python: `3.11.10`
373
+ - kernel: `Linux 6.8.0-88-generic`
374
+
375
+ ## Recommended Starting Points
376
+
377
+ If you want the strongest current public benchmark evidence, start here:
378
+
379
+ - `docs/maniskill_pickclutter_correction_log_2026-04-01.md`
380
+ - `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
381
+
382
+ If you want the bag/cloth public bridge follow-up, start here:
383
+
384
+ - `docs/public_bridge_smoke_run_log_2026-04-01.md`
385
+ - `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
386
+ - `reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json`
387
+
388
+ If you want the repo lineage context, start here:
389
+
390
+ - `history/VLAarchtests_previous_README.md`
391
+ - `history/VLAarchtests2_previous_README.md`
392
+ - `history/VLAarchtests3_previous_README.md`
393
+
394
+ ## Bottom Line
395
+
396
+ This repo is the complete organization package for the current workspace state.
397
+
398
+ It includes:
399
+
400
+ - the `VLAarchtests3` export base
401
+ - the full current machine `reports/`, `outputs/`, and `data/` trees
402
+ - the public benchmark code, datasets, checkpoints, and results
403
+ - the environment files needed to stand up the same stack on similar hardware
404
+
405
+ Use it as the archival handoff state for continuing the elastic-occlusion adapter work.
406
+
407
+ Do not cite it as if all three target-like public proxy tracks are already cleanly solved. The occlusion track is the strongest current evidence; bag is modest; cloth remains weak; and the bridge-task perception stack still needs a proper real-depth rewrite.
code/VLAarchtests4_root/docs/maniskill_pickclutter_correction_log_2026-04-01.md ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ManiSkill PickClutter Correction Log (2026-04-01)
2
+
3
+ ## Scope
4
+
5
+ Public benchmark:
6
+
7
+ - ManiSkill 3 `PickClutterYCB-v1`
8
+
9
+ Frozen public split reused across all runs:
10
+
11
+ - train demos: `32` episodes
12
+ - val demos: `8` episodes
13
+ - eval episodes: `50`
14
+ - seed: `17`
15
+ - data bundle: `/workspace/workspace/data/maniskill_pickclutter/smoke_v3`
16
+
17
+ Fair comparison modes:
18
+
19
+ - `trunk_only_ft`
20
+ - `adapter_noop`
21
+ - `adapter_active_ft`
22
+
23
+ ## Code Changes
24
+
25
+ Runner changes:
26
+
27
+ - enabled candidate rollout supervision from real ManiSkill states
28
+ - enabled adapter transition-model training/eval
29
+ - unfroze `adapter.transition_model`
30
+ - set non-zero transition loss weight
31
+ - added ManiSkill smoke planner overrides for the occlusion proxy:
32
+ - `adapter_confidence_threshold=0.50`
33
+ - `retrieve_access_threshold=0.08`
34
+ - `retrieve_persistence_threshold=0.12`
35
+ - `retrieve_support_threshold=0.08`
36
+ - `retrieve_reocclusion_threshold=0.92`
37
+
38
+ Planner correction:
39
+
40
+ - changed adapter stage rules from hard vetoes to soft penalties in
41
+ `/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py`
42
+
43
+ ## Runs
44
+
45
+ ### 1. `smoke_v3` corrected-train baseline
46
+
47
+ Artifacts:
48
+
49
+ - summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v3/public_benchmark_package_summary.json`
50
+
51
+ Result:
52
+
53
+ - `trunk_only_ft=0.06`
54
+ - `adapter_noop=0.06`
55
+ - `adapter_active_ft=0.06`
56
+ - `intervention_rate=0.0`
57
+ - `non_base_selection_rate=0.0`
58
+
59
+ Interpretation:
60
+
61
+ - rollout supervision and transition-model training alone were not enough
62
+ - the adapter remained inert
63
+
64
+ ### 2. `smoke_v4_evalprobe_fromv3` corrected-planner eval on `smoke_v3` weights
65
+
66
+ Artifacts:
67
+
68
+ - summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json`
69
+
70
+ Result:
71
+
72
+ - `trunk_only_ft=0.06`
73
+ - `adapter_noop=0.06`
74
+ - `adapter_active_ft=0.62`
75
+ - `delta_active_vs_trunk=+0.56`
76
+ - `95% CI=[+0.40, +0.70]`
77
+ - `intervention_rate=1.0`
78
+ - `non_base_selection_rate=1.0`
79
+
80
+ Interpretation:
81
+
82
+ - this is the first real adapter-specific sign of life on the public benchmark
83
+ - the corrected planner logic is doing the work
84
+ - the improvement is not coming from the shared trunk, because `adapter_noop` stayed at `0.06`
85
+
86
+ ### 3. `smoke_v4` clean retrain with corrected planner active during train and eval
87
+
88
+ Artifacts:
89
+
90
+ - summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v4/public_benchmark_package_summary.json`
91
+
92
+ Result:
93
+
94
+ - `trunk_only_ft=0.48`
95
+ - `adapter_noop=0.04`
96
+ - `adapter_active_ft=0.04`
97
+ - `intervention_rate=1.0`
98
+ - `non_base_selection_rate=1.0`
99
+ - `delta_active_vs_trunk=-0.44`
100
+
101
+ Interpretation:
102
+
103
+ - the clean retrain under corrected planner logic is unstable / regressive
104
+ - the adapter-trained checkpoint collapsed even though active mode intervened
105
+ - current evidence supports the corrected planner as a real eval-time model fix, but not yet as a stable retrain recipe
106
+
107
+ ### 4. `smoke_v5` fair retrain with trunk-action supervision preserved inside adapter training
108
+
109
+ Artifacts:
110
+
111
+ - summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5/public_benchmark_package_summary.json`
112
+
113
+ Result:
114
+
115
+ - `trunk_only_ft=0.04`
116
+ - `adapter_noop=0.04`
117
+ - `adapter_active_ft=0.04`
118
+ - `intervention_rate=1.0`
119
+ - `non_base_selection_rate=1.0`
120
+ - `delta_active_vs_trunk=0.00`
121
+
122
+ Interpretation:
123
+
124
+ - this fixed the fairness problem from `smoke_v4`: the adapter-trained checkpoint no longer hid a stronger trunk, because `adapter_noop` matched `trunk_only_ft`
125
+ - but the active branch still failed because the planner collapsed to `maintain_gap` on every decision
126
+
127
+ ### 5. `smoke_v5_val_sweep` and held-out `smoke_v5_eval_tuned_softerpref`
128
+
129
+ Artifacts:
130
+
131
+ - val sweep: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_val_sweep/summary.json`
132
+ - held-out summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
133
+
134
+ Val-selected planner override:
135
+
136
+ - `mode_preference_bonus=0.75`
137
+ - `premature_retrieve_penalty=0.5`
138
+ - `premature_insert_penalty=0.25`
139
+ - `premature_maintain_penalty=1.0`
140
+ - `occlusion_maintain_gap_min_access=0.30`
141
+ - `occlusion_maintain_gap_min_visibility=0.20`
142
+ - `retrieve_stage_access_threshold=0.18`
143
+ - `retrieve_stage_reveal_threshold=0.18`
144
+ - `retrieve_stage_support_threshold=0.18`
145
+
146
+ Validation result:
147
+
148
+ - `baseline_corrected=0.00`
149
+ - `soft_pref=0.00`
150
+ - `softer_pref=0.625`
151
+ - `retrieve_open=0.625`
152
+
153
+ Held-out result:
154
+
155
+ - `trunk_only_ft=0.04`
156
+ - `adapter_noop=0.04`
157
+ - `adapter_active_ft=0.62`
158
+ - `delta_active_vs_trunk=+0.58`
159
+ - `95% CI=[+0.44, +0.72]`
160
+ - `intervention_rate=1.0`
161
+ - `non_base_selection_rate=1.0`
162
+ - `steps_to_retrieve=1.0`
163
+ - `signs_of_life=true`
164
+
165
+ Interpretation:
166
+
167
+ - this is a fair held-out public-benchmark win on the dense-occlusion proxy
168
+ - the gain is adapter-specific because `adapter_noop` stayed flat with the trunk baseline
169
+ - the fixed checkpoint from `smoke_v5` was viable; the missing piece was planner-stage calibration on the frozen validation split
170
+
171
+ ## Current Best Public-Benchmark Evidence
172
+
173
+ Best adapter-specific evidence currently available:
174
+
175
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
176
+
177
+ Why this is the strongest result:
178
+
179
+ - same frozen public train/val/eval split
180
+ - same trained trunk baseline and adapter checkpoint
181
+ - planner override selected on the frozen validation split before the held-out eval run
182
+ - `adapter_noop` isolates the shared-trunk effect and stays flat
183
+ - only `adapter_active_ft` improves, so the gain is caused by live adapter intervention
184
+
185
+ ## Open Problem
186
+
187
+ The dense-occlusion proxy now has a fair held-out win, but bag-style and cloth-style public proxy tracks are still missing. The next work item is to bring up the next public proxy benchmark instead of re-running more occlusion-only sweeps.
code/VLAarchtests4_root/docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimum Sign-of-Life Runbook: ManiSkill PickClutterYCB-v1
2
+
3
+ Date: 2026-04-01
4
+
5
+ ## Goal
6
+
7
+ Run one real public-benchmark smoke on ManiSkill `PickClutterYCB-v1` that fairly compares:
8
+
9
+ - `trunk_only_ft`
10
+ - `adapter_noop`
11
+ - `adapter_active_ft`
12
+
13
+ The target claim for this run is narrow:
14
+
15
+ - on dense occluded retrieval, the adapter should visibly intervene and beat the trunk-only control trained on the same data;
16
+ - this is a minimum-sign-of-life run, not the full 3-track package.
17
+
18
+ ## Correction After First Failed Smoke
19
+
20
+ The first smoke run confirmed that the real ManiSkill benchmark path worked, but the adapter stayed inert because the smoke dataset only supervised proposal ranking. The corrected run keeps the same benchmark and fairness contract and restarts both trained variants from the same init on the same frozen split, but adds benchmark-derived current-state supervision for the adapter:
21
+
22
+ - `support_mode`
23
+ - `corridor_feasible`
24
+ - `persistence_horizon`
25
+ - `disturbance_cost`
26
+ - selected task metrics that feed the adapter gate and mode bias
27
+ - `state_confidence_target`
28
+
29
+ This corrected artifact set is versioned as `smoke_v2`.
30
+
31
+ ## Fixed Protocol
32
+
33
+ ### Benchmark
34
+
35
+ - Public benchmark: ManiSkill `PickClutterYCB-v1`
36
+ - Track id: `occlusion_track`
37
+ - Task family label inside the adapter stack: `foliage`
38
+ - Resolution: `224`
39
+ - One seed for training: `17`
40
+
41
+ ### Data
42
+
43
+ - One benchmark-native scripted macro teacher collects the dataset once.
44
+ - The teacher also writes per-step scene-state labels from the same public benchmark state and candidate sweep.
45
+ - Train demos: `32`
46
+ - Val demos: `8`
47
+ - Held-out eval episodes: `50`
48
+ - The exact train, val, and eval episode ids/seeds are frozen once generated and written to disk.
49
+
50
+ ### Fairness Constraints
51
+
52
+ - Both trained conditions start from the same initialization checkpoint.
53
+ - Both use the same train/val split.
54
+ - Both use the same held-out eval episodes.
55
+ - Both use the same optimizer, batch size, LR, epoch budget, and random seed.
56
+ - Both fine-tune the same trunk submodules:
57
+ - fusion
58
+ - memory
59
+ - decoder
60
+ - The backbone stays frozen for both conditions.
61
+ - The only architectural difference is the presence of the structured adapter.
62
+ - `candidate0` remains the raw trunk action.
63
+ - `adapter_noop` is eval-only and is produced from the `adapter_active_ft` checkpoint.
64
+
65
+ ### Initialization
66
+
67
+ - Shared init checkpoint:
68
+ - `/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt`
69
+ - Load with shape filtering / `init_strict: false`.
70
+
71
+ ### Training Conditions
72
+
73
+ #### `trunk_only_ft`
74
+
75
+ - Policy type: `foundation_trunk`
76
+ - Trainable prefixes:
77
+ - `fusion`
78
+ - `memory`
79
+ - `decoder`
80
+
81
+ #### `adapter_active_ft`
82
+
83
+ - Policy type: `adapter_wrapped`
84
+ - Trainable prefixes:
85
+ - `trunk.fusion`
86
+ - `trunk.memory`
87
+ - `trunk.decoder`
88
+ - `adapter.state_head`
89
+ - `adapter.proposal_prior`
90
+ - `adapter.planner`
91
+ - Transition model: off for the smoke run
92
+
93
+ ### Evaluation Conditions
94
+
95
+ - `trunk_only_ft`: trained trunk checkpoint
96
+ - `adapter_noop`: adapter checkpoint in no-op mode
97
+ - `adapter_active_ft`: adapter checkpoint in active mode
98
+
99
+ ### Success Criteria
100
+
101
+ - Adapter success exceeds trunk-only success by at least `5` absolute points on the held-out `50` episodes.
102
+ - Adapter intervention rate is at least `15%`.
103
+ - Non-base selection rate is at least `15%`.
104
+ - Results are summarized through the public benchmark package reporter under the `occlusion_track`.
105
+
106
+ ## Planned Artifacts
107
+
108
+ ### Data
109
+
110
+ - `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/train.pt`
111
+ - `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/val.pt`
112
+ - `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/episode_splits.json`
113
+
114
+ ### Train Outputs
115
+
116
+ - `/workspace/workspace/outputs/maniskill_pickclutter_smoke_v2/trunk_only_ft_seed17/`
117
+ - `/workspace/workspace/outputs/maniskill_pickclutter_smoke_v2/adapter_active_ft_seed17/`
118
+
119
+ ### Eval Outputs
120
+
121
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/trunk_only_ft_seed17.json`
122
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/adapter_noop_seed17.json`
123
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/adapter_active_ft_seed17.json`
124
+
125
+ ### Package Summary
126
+
127
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.json`
128
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.md`
129
+
130
+ ## Notes
131
+
132
+ - This run is intentionally limited to the fastest credible public target track.
133
+ - No custom benchmark or custom teleop suite is allowed for this smoke.
134
+ - If the ManiSkill runtime or macro controller requires repairs, the repairs must preserve the fairness constraints above.
code/VLAarchtests4_root/docs/public_benchmark_progress_2026-04-01.md ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## Public Benchmark Progress
2
+
3
+ Date: 2026-04-01 UTC
4
+
5
+ ### Confirmed Real Public Benchmark Result
6
+
7
+ - Public occlusion proxy: `ManiSkill PickClutterYCB-v1`
8
+ - Strongest adapter-specific result so far:
9
+ - summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
10
+ - `trunk_only_ft = 0.04`
11
+ - `adapter_noop = 0.04`
12
+ - `adapter_active_ft = 0.62`
13
+ - `delta_active_vs_trunk = +0.58`
14
+ - `95% CI = [0.44, 0.72]`
15
+ - `intervention_rate = 1.0`
16
+ - `non_base_selection_rate = 1.0`
17
+ - Interpretation:
18
+ - this is real adapter-specific sign of life on a public occlusion benchmark
19
+ - the gain is not coming from a stronger shared trunk, because `adapter_noop` stays flat
20
+
21
+ ### BEHAVIOR Bag Proxy Investigation
22
+
23
+ Target public task family:
24
+ - official BEHAVIOR grocery-store bag/container retrieval proxy
25
+ - primary candidate: `paying_for_purchases`
26
+ - stricter but currently unusable candidate: `buy_basic_garden_tools`
27
+
28
+ Environment used:
29
+ - BEHAVIOR assets: `/workspace/workspace/BEHAVIOR-1K`
30
+ - venv used for probes: `/workspace/envs/behavior`
31
+
32
+ Findings:
33
+ - `buy_basic_garden_tools` is blocked by official scene-task geometry:
34
+ - repeated failure on `ontop ['rake.n.03_1', 'grocery_shelf.n.01_1']`
35
+ - even with whitelist attempts, the sampler never found a valid shelf placement
36
+ - `paying_for_purchases` is much healthier:
37
+ - `grocery_store_convenience`, `grocery_store_cafe`, and `grocery_store_asian` all load
38
+ - object scope binds the real task objects:
39
+ - `shopping_basket.n.01_1`
40
+ - `money.n.01_1`
41
+ - `checkout.n.03_1`
42
+ - `floor.n.01_1`
43
+ - Root sampler bug:
44
+ - official online sampling fails on the floor / agent chain
45
+ - without patching, the blocking warning is:
46
+ - `Room type [grocery_store] ... floor.n.01_1: , checkout.n.03_1: grocery_store_0`
47
+ - after removing the agent-on-floor condition from the sampler pipeline, the next blocker is:
48
+ - `ontop ['shopping_basket.n.01_1', 'floor.n.01_1'] False`
49
+ - Critical state-probe result:
50
+ - even when object bindings exist, the sampled movable objects remain parked at their far-away import positions
51
+ - observed example on `grocery_store_asian`:
52
+ - basket position near `[120, 120, -80]`
53
+ - money position near `[115, 115, -85]`
54
+ - apples position near `[110, 110, -90]` and `[105, 105, -95]`
55
+ - `money inside basket = False`
56
+ - `apple1 inside basket = False`
57
+ - `apple2 inside basket = False`
58
+ - Conclusion:
59
+ - as of 2026-04-01, the BEHAVIOR bag proxy is not yet a usable fair evaluation track in this workspace
60
+ - the public task objects bind, but the online sampler does not materialize a valid initial scene for training or evaluation
61
+
62
+ ### Garment / Cloth Proxy Status
63
+
64
+ - GarmentLab repo cloned:
65
+ - `/workspace/workspace/GarmentLab`
66
+ - Immediate constraint:
67
+ - the repo expects Isaac Sim 4.0.0 plus external Google Drive assets
68
+ - Current status:
69
+ - code inspected only
70
+ - no runnable public cloth benchmark execution completed yet in this workspace
71
+
72
+ ### Next Public Proxy Candidates
73
+
74
+ Given the BEHAVIOR blocker, the next-lightest public candidates already available locally are:
75
+
76
+ - `OpenCabinetDrawer-v1`
77
+ - public ManiSkill task
78
+ - good container reveal / access proxy
79
+ - `PutEggplantInBasketScene-v1`
80
+ - public ManiSkill bridge-dataset task
81
+ - public basket / container interaction proxy
82
+ - `PutSpoonOnTableClothInScene-v1`
83
+ - public ManiSkill bridge-dataset cloth interaction proxy
84
+
85
+ ### Immediate Recommendation
86
+
87
+ - Keep the confirmed `PickClutterYCB-v1` result as the anchor public success case.
88
+ - Do not spend more time on BEHAVIOR online sampling until either:
89
+ - a cached valid scene instance is created, or
90
+ - the sampler is patched deeply enough to place container objects correctly instead of leaving them at far-away import positions.
91
+ - Pivot the next train/eval smoke to a lighter public ManiSkill proxy before returning to BEHAVIOR.
code/VLAarchtests4_root/docs/public_bridge_smoke_run_log_2026-04-01.md ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Public Bridge Smoke Run Log
2
+
3
+ Date: 2026-04-01 UTC
4
+
5
+ ## Completed public proxy evidence
6
+
7
+ - Occlusion proxy already completed earlier on `PickClutterYCB-v1`.
8
+ - Best current occlusion report:
9
+ - `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
10
+ - `trunk_only_ft=0.04`
11
+ - `adapter_noop=0.04`
12
+ - `adapter_active_ft=0.62`
13
+ - `delta_active_vs_trunk=+0.58`
14
+ - `95% CI [0.44, 0.72]`
15
+ - `intervention_rate=1.0`
16
+ - `non_base_selection_rate=1.0`
17
+
18
+ - Bag proxy completed on the public ManiSkill bridge basket scene proxy.
19
+ - Bag report directory:
20
+ - `/workspace/workspace/reports/maniskill_bag_bridge_smoke_v1`
21
+ - Bag result summary:
22
+ - `trunk_only_ft=0.32`
23
+ - `adapter_noop=0.00`
24
+ - `adapter_active_ft=0.48`
25
+ - `delta_active_vs_trunk=+0.16`
26
+ - `delta_active_vs_trunk_ci95=[-0.04, 0.34]`
27
+ - `intervention_rate=1.0`
28
+ - `non_base_selection_rate=1.0`
29
+ - bag track `signs_of_life=true`
30
+ - package-level headline gate remains false at this single-seed smoke scale
31
+
32
+ ## Cloth proxy definition
33
+
34
+ - Public scene proxy:
35
+ - `PutSpoonOnTableClothInScene-v1`
36
+ - Fixed hidden-state initialization:
37
+ - spoon pose `[-0.235, -0.094, 0.8748]`
38
+ - cloth pose `[-0.235, -0.075, 0.885]`
39
+ - Deterministic valid-seed filter:
40
+ - accept only seeds whose initialized hidden state is below the visibility gate and solvable by scripted reveal+retrieve
41
+ - Reveal macros corrected to push-style actions:
42
+ - `lift_edge` = front push in `+y`
43
+ - `separate_layer` = side push in `+x`
44
+ - Cloth success metric corrected:
45
+ - based on spoon displacement from its own hidden start plus visibility
46
+ - no longer credits success merely because the cloth flies away
47
+
48
+ ## Important runner fixes already landed
49
+
50
+ - File:
51
+ - `/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py`
52
+ - Fixed:
53
+ - cloth hidden-state initialization
54
+ - cloth seed filtering and split reuse via `episode_splits.json`
55
+ - `post_bundle` missing in cloth collect success check
56
+ - bridge smoke loss weights aligned to current `LossWeights`
57
+ - adapter trainable parameter prefixes aligned to working pickclutter runner
58
+ - zero-depth layout changed to channel-first
59
+ - cached dataset normalizer added for old channel-last depth tensors
60
+
61
+ ## Live status when this note was written
62
+
63
+ - Bag process is complete.
64
+ - Cloth process is still collecting the train split in the original long-running session.
65
+ - The long-running cloth process was started before the later loss-weight and depth-layout fixes, so it is expected to finish collection and then crash at training start.
66
+ - After it writes `train.pt` and `val.pt`, restart cloth with:
67
+
68
+ ```bash
69
+ python /workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py --task cloth --skip-collection
70
+ ```
71
+
72
+ - If trunk checkpoint already exists by that point and only adapter needs rerun:
73
+
74
+ ```bash
75
+ python /workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py --task cloth --skip-collection --reuse-checkpoints
76
+ ```
77
+
78
+ ## Cloth restart correction
79
+
80
+ - The corrected cloth restart reached adapter training and failed in rollout supervision because the cached cloth public proxy authored `7` candidate targets while the decoder always allocates `8` proposal slots.
81
+ - Fix landed in:
82
+ - `/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py`
83
+ - Correction:
84
+ - cached bridge samples now normalize channel-last depth tensors as before
85
+ - cached candidate-aligned tensors now also pad from `7 -> 8` slots before loading
86
+ - padding cycles the non-base candidates first, which preserves the collected cloth episodes and avoids recollection
87
+ - Verified locally before restart:
88
+ - normalized cloth `candidate_action_chunks` is `(8, 8, 14)`
89
+ - normalized cloth `candidate_rollout_support_mode` is `(8, 5)`
90
+ - one real `adapter_active_ft` training step and one real validation loss pass both completed without the previous shape error
91
+
92
+ ## Cloth result
93
+
94
+ - Report directory:
95
+ - `/workspace/workspace/reports/maniskill_cloth_bridge_smoke_v1`
96
+ - Final cloth smoke summary:
97
+ - `trunk_only_ft = 0.04`
98
+ - `adapter_noop = 0.04`
99
+ - `adapter_active_ft = 0.10`
100
+ - `delta_active_vs_trunk = +0.06`
101
+ - `delta_active_vs_trunk_ci95 = [-0.04, 0.16]`
102
+ - `intervention_rate = 0.3369`
103
+ - `non_base_selection_rate = 0.2674`
104
+ - Interpretation:
105
+ - cloth proxy is positive and adapter-specific in this single-seed smoke because `adapter_noop` stayed flat while `adapter_active_ft` improved
106
+ - effect size is modest and not yet statistically clean in this smoke protocol
107
+
108
+ ## Combined three-track proxy suite
109
+
110
+ - Combined report:
111
+ - `/workspace/workspace/reports/public_proxy_suite_smoke_v1/combined_summary.json`
112
+ - `/workspace/workspace/reports/public_proxy_suite_smoke_v1/combined_summary.md`
113
+ - Current three-track smoke evidence:
114
+ - occlusion proxy positive and adapter-specific
115
+ - bag proxy positive and adapter-specific
116
+ - cloth proxy positive and adapter-specific
code/VLAarchtests4_root/setup/public_benchmark/ENVIRONMENT.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Public Benchmark Environment Manifest
2
+
3
+ This file describes the environment used for the current ManiSkill public benchmark runs staged in `VLAarchtests4`.
4
+
5
+ ## Hardware
6
+
7
+ - GPU: `NVIDIA L40S`
8
+ - VRAM: `46068 MiB`
9
+ - Driver: `580.126.09`
10
+ - Kernel: `Linux 6.8.0-88-generic`
11
+ - Python: `3.11.10`
12
+
13
+ Raw snapshots:
14
+
15
+ - `setup/public_benchmark/nvidia_smi.txt`
16
+ - `setup/public_benchmark/gpu_short.txt`
17
+ - `setup/public_benchmark/uname.txt`
18
+ - `setup/public_benchmark/python_version.txt`
19
+
20
+ ## Python Packages
21
+
22
+ Package snapshots:
23
+
24
+ - current Python 3.11 env:
25
+ - `setup/public_benchmark/pip_freeze_python311.txt`
26
+ - prior RLBench env snapshot preserved for continuity:
27
+ - `setup/public_benchmark/rlbench_env_pip_freeze.txt`
28
+
29
+ Hugging Face CLI environment:
30
+
31
+ - `setup/public_benchmark/hf_env.txt`
32
+
33
+ ## Runtime Variables For The Public Benchmark Stack
34
+
35
+ The ManiSkill public benchmark code paths set runtime variables equivalent to:
36
+
37
+ ```bash
38
+ export VK_ICD_FILENAMES=/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json
39
+ export VK_LAYER_PATH=/workspace/runtime/vulkan/implicit_layer.d
40
+ export XDG_RUNTIME_DIR=/tmp/runtime-root
41
+ export MS_ASSET_DIR=/workspace/data/maniskill
42
+ ```
43
+
44
+ The local project code path still needs:
45
+
46
+ ```bash
47
+ export PYTHONPATH=/workspace/workspace/VLAarchtests4_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual:${PYTHONPATH:-}
48
+ ```
49
+
50
+ Convenience helper:
51
+
52
+ - `setup/public_benchmark/env_vars.sh`
53
+
54
+ ## Important Distinction From Older RLBench Stack
55
+
56
+ The older `VLAarchtests3` environment files under `setup/` are still preserved and remain relevant for:
57
+
58
+ - AnyBimanual
59
+ - RLBench
60
+ - CoppeliaSim / PyRep
61
+
62
+ The current public benchmark ManiSkill runs were primarily executed from the current Python 3.11 environment captured here, not from the older Python 3.10 RLBench stack described in `setup/ENVIRONMENT.md`.
63
+
64
+ ## Caveats
65
+
66
+ - PickClutter used real `rgbd_3cam`.
67
+ - The bridge bag/cloth proxy runner used a compatibility shim that duplicated one RGB view across all camera slots and zero-filled depth.
68
+ - Reproducing those exact bridge results requires keeping that current runner behavior unchanged.
code/VLAarchtests4_root/setup/public_benchmark/env_vars.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export VK_ICD_FILENAMES="${VK_ICD_FILENAMES:-/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json}"
5
+ export VK_LAYER_PATH="${VK_LAYER_PATH:-/workspace/runtime/vulkan/implicit_layer.d}"
6
+ export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/tmp/runtime-root}"
7
+ export MS_ASSET_DIR="${MS_ASSET_DIR:-/workspace/data/maniskill}"
8
+
9
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
10
+ export PYTHONPATH="${ROOT_DIR}/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual:${PYTHONPATH:-}"
code/VLAarchtests4_root/setup/public_benchmark/gpu_short.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ NVIDIA L40S, 46068 MiB, 580.126.09
code/VLAarchtests4_root/setup/public_benchmark/hf_env.txt ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Copy-and-paste the text below in your GitHub issue.
3
+
4
+ - huggingface_hub version: 1.8.0
5
+ - Platform: Linux-6.8.0-88-generic-x86_64-with-glibc2.35
6
+ - Python version: 3.11.10
7
+ - Running in iPython ?: No
8
+ - Running in notebook ?: No
9
+ - Running in Google Colab ?: No
10
+ - Running in Google Colab Enterprise ?: No
11
+ - Token path ?: /workspace/.cache/huggingface/token
12
+ - Has saved token ?: True
13
+ - Who am I ?: lsnu
14
+ - Configured git credential helpers:
15
+ - Installation method: hf_installer
16
+ - httpx: 0.28.1
17
+ - hf_xet: 1.4.2
18
+ - gradio: N/A
19
+ - tensorboard: N/A
20
+ - ENDPOINT: https://huggingface.co
21
+ - HF_HUB_CACHE: /workspace/.cache/huggingface/hub
22
+ - HF_ASSETS_CACHE: /workspace/.cache/huggingface/assets
23
+ - HF_TOKEN_PATH: /workspace/.cache/huggingface/token
24
+ - HF_STORED_TOKENS_PATH: /workspace/.cache/huggingface/stored_tokens
25
+ - HF_HUB_OFFLINE: False
26
+ - HF_HUB_DISABLE_TELEMETRY: False
27
+ - HF_HUB_DISABLE_PROGRESS_BARS: None
28
+ - HF_HUB_DISABLE_SYMLINKS_WARNING: False
29
+ - HF_HUB_DISABLE_EXPERIMENTAL_WARNING: False
30
+ - HF_HUB_DISABLE_IMPLICIT_TOKEN: False
31
+ - HF_HUB_DISABLE_XET: False
32
+ - HF_HUB_ETAG_TIMEOUT: 10
33
+ - HF_HUB_DOWNLOAD_TIMEOUT: 10
34
+ - HF_XET_HIGH_PERFORMANCE: False
35
+
code/VLAarchtests4_root/setup/public_benchmark/nvidia_smi.txt ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Wed Apr 1 13:02:04 2026
2
+ +-----------------------------------------------------------------------------------------+
3
+ | NVIDIA-SMI 580.126.09 Driver Version: 580.126.09 CUDA Version: 13.0 |
4
+ +-----------------------------------------+------------------------+----------------------+
5
+ | GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
6
+ | Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
7
+ | | | MIG M. |
8
+ |=========================================+========================+======================|
9
+ | 0 NVIDIA L40S On | 00000000:05:00.0 Off | 0 |
10
+ | N/A 31C P8 34W / 350W | 54MiB / 46068MiB | 0% Default |
11
+ | | | N/A |
12
+ +-----------------------------------------+------------------------+----------------------+
13
+
14
+ +-----------------------------------------------------------------------------------------+
15
+ | Processes: |
16
+ | GPU GI CI PID Type Process name GPU Memory |
17
+ | ID ID Usage |
18
+ |=========================================================================================|
19
+ | 0 N/A N/A 76839 G /usr/lib/xorg/Xorg 37MiB |
20
+ | 0 N/A N/A 76839 G /usr/lib/xorg/Xorg 37MiB |
21
+ +-----------------------------------------------------------------------------------------+
code/VLAarchtests4_root/setup/public_benchmark/pip_freeze_python311.txt ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.4.0
2
+ accelerate==1.13.0
3
+ annotated-doc==0.0.4
4
+ antlr4-python3-runtime==4.9.3
5
+ anyio==4.6.0
6
+ argon2-cffi==23.1.0
7
+ argon2-cffi-bindings==21.2.0
8
+ arm_pytorch_utilities==0.5.0
9
+ arrow==1.3.0
10
+ asttokens==2.4.1
11
+ async-lru==2.0.4
12
+ attrs==24.2.0
13
+ babel==2.16.0
14
+ beautifulsoup4==4.12.3
15
+ bleach==6.1.0
16
+ blinker==1.4
17
+ certifi==2024.8.30
18
+ cffi==1.17.1
19
+ charset-normalizer==3.3.2
20
+ click==8.3.1
21
+ cloudpickle==3.1.2
22
+ comm==0.2.2
23
+ contourpy==1.3.3
24
+ cryptography==3.4.8
25
+ cycler==0.12.1
26
+ dacite==1.9.2
27
+ dbus-python==1.2.18
28
+ debugpy==1.8.5
29
+ decorator==5.1.1
30
+ defusedxml==0.7.1
31
+ distro==1.7.0
32
+ docstring_parser==0.17.0
33
+ einops==0.8.2
34
+ entrypoints==0.4
35
+ executing==2.1.0
36
+ Farama-Notifications==0.0.4
37
+ fast_kinematics==0.2.2
38
+ fastjsonschema==2.20.0
39
+ filelock==3.13.1
40
+ fonttools==4.62.1
41
+ fqdn==1.5.1
42
+ fsspec==2024.2.0
43
+ gitdb==4.0.12
44
+ GitPython==3.1.46
45
+ grpcio==1.80.0
46
+ gymnasium==0.29.1
47
+ h11==0.14.0
48
+ h5py==3.16.0
49
+ hf-xet==1.4.3
50
+ httpcore==1.0.5
51
+ httplib2==0.20.2
52
+ httpx==0.27.2
53
+ huggingface_hub==1.8.0
54
+ hydra-core==1.3.2
55
+ idna==3.10
56
+ ImageIO==2.37.3
57
+ imageio-ffmpeg==0.6.0
58
+ importlib-metadata==4.6.4
59
+ importlib_resources==6.5.2
60
+ ipykernel==6.29.5
61
+ ipython==8.27.0
62
+ ipython-genutils==0.2.0
63
+ ipywidgets==8.1.5
64
+ isoduration==20.11.0
65
+ jedi==0.19.1
66
+ jeepney==0.7.1
67
+ Jinja2==3.1.3
68
+ json5==0.9.25
69
+ jsonpointer==3.0.0
70
+ jsonschema==4.23.0
71
+ jsonschema-specifications==2023.12.1
72
+ jupyter-archive==3.4.0
73
+ jupyter-events==0.10.0
74
+ jupyter-highlight-selected-word==0.2.0
75
+ jupyter-lsp==2.2.5
76
+ jupyter_client==7.4.9
77
+ jupyter_contrib_core==0.4.2
78
+ jupyter_contrib_nbextensions==0.7.0
79
+ jupyter_core==5.7.2
80
+ jupyter_nbextensions_configurator==0.6.4
81
+ jupyter_server==2.14.2
82
+ jupyter_server_terminals==0.5.3
83
+ jupyterlab==4.2.5
84
+ jupyterlab_pygments==0.3.0
85
+ jupyterlab_server==2.27.3
86
+ jupyterlab_widgets==3.0.13
87
+ keyring==23.5.0
88
+ kiwisolver==1.5.0
89
+ launchpadlib==1.10.16
90
+ lazr.restfulclient==0.14.4
91
+ lazr.uri==1.0.6
92
+ lxml==5.3.0
93
+ mani_skill==3.0.0b22
94
+ Markdown==3.10.2
95
+ markdown-it-py==4.0.0
96
+ MarkupSafe==2.1.5
97
+ matplotlib==3.10.8
98
+ matplotlib-inline==0.1.7
99
+ mdurl==0.1.2
100
+ mistune==3.0.2
101
+ more-itertools==8.10.0
102
+ mplib==0.1.1
103
+ mpmath==1.3.0
104
+ nbclassic==1.1.0
105
+ nbclient==0.10.0
106
+ nbconvert==7.16.4
107
+ nbformat==5.10.4
108
+ nest-asyncio==1.6.0
109
+ networkx==3.2.1
110
+ notebook==6.5.5
111
+ notebook_shim==0.2.4
112
+ numpy==2.4.4
113
+ nvidia-cublas-cu12==12.4.2.65
114
+ nvidia-cuda-cupti-cu12==12.4.99
115
+ nvidia-cuda-nvrtc-cu12==12.4.99
116
+ nvidia-cuda-runtime-cu12==12.4.99
117
+ nvidia-cudnn-cu12==9.1.0.70
118
+ nvidia-cufft-cu12==11.2.0.44
119
+ nvidia-curand-cu12==10.3.5.119
120
+ nvidia-cusolver-cu12==11.6.0.99
121
+ nvidia-cusparse-cu12==12.3.0.142
122
+ nvidia-ml-py==13.595.45
123
+ nvidia-nccl-cu12==2.20.5
124
+ nvidia-nvjitlink-cu12==12.4.99
125
+ nvidia-nvtx-cu12==12.4.99
126
+ oauthlib==3.2.0
127
+ omegaconf==2.3.0
128
+ opencv-python==4.13.0.92
129
+ overrides==7.7.0
130
+ packaging==24.1
131
+ pandas==3.0.2
132
+ pandocfilters==1.5.1
133
+ parso==0.8.4
134
+ pexpect==4.9.0
135
+ pillow==10.2.0
136
+ platformdirs==4.3.6
137
+ prometheus_client==0.21.0
138
+ prompt_toolkit==3.0.47
139
+ protobuf==7.34.1
140
+ psutil==6.0.0
141
+ ptyprocess==0.7.0
142
+ pure_eval==0.2.3
143
+ py-spy==0.4.1
144
+ pycparser==2.22
145
+ Pygments==2.18.0
146
+ PyGObject==3.42.1
147
+ PyJWT==2.3.0
148
+ pyparsing==3.3.2
149
+ pyperclip==1.11.0
150
+ python-apt==2.4.0+ubuntu4
151
+ python-dateutil==2.9.0.post0
152
+ python-json-logger==2.0.7
153
+ pytorch-kinematics==0.7.6
154
+ pytorch-seed==0.2.0
155
+ PyYAML==6.0.2
156
+ pyzmq==24.0.1
157
+ referencing==0.35.1
158
+ regex==2026.3.32
159
+ requests==2.32.3
160
+ rfc3339-validator==0.1.4
161
+ rfc3986-validator==0.1.1
162
+ rich==14.3.3
163
+ rpds-py==0.20.0
164
+ safetensors==0.7.0
165
+ sapien==3.0.3
166
+ scipy==1.17.1
167
+ SecretStorage==3.3.1
168
+ Send2Trash==1.8.3
169
+ shellingham==1.5.4
170
+ six==1.16.0
171
+ smmap==5.0.3
172
+ sniffio==1.3.1
173
+ soupsieve==2.6
174
+ stack-data==0.6.3
175
+ sympy==1.12
176
+ systemd-python==234
177
+ tabulate==0.10.0
178
+ tensorboard==2.20.0
179
+ tensorboard-data-server==0.7.2
180
+ terminado==0.18.1
181
+ timm==1.0.26
182
+ tinycss2==1.3.0
183
+ tokenizers==0.22.2
184
+ toppra==0.6.3
185
+ torch==2.4.1+cu124
186
+ torchaudio==2.4.1+cu124
187
+ torchvision==0.19.1+cu124
188
+ tornado==6.4.1
189
+ tqdm==4.67.3
190
+ traitlets==5.14.3
191
+ transformers==5.4.0
192
+ transforms3d==0.4.2
193
+ trimesh==4.11.5
194
+ triton==3.0.0
195
+ typeguard==4.5.1
196
+ typer==0.24.1
197
+ types-python-dateutil==2.9.0.20240906
198
+ typing_extensions==4.15.0
199
+ tyro==1.0.11
200
+ uri-template==1.3.0
201
+ urllib3==2.2.3
202
+ wadllib==1.3.6
203
+ wcwidth==0.2.13
204
+ webcolors==24.8.0
205
+ webencodings==0.5.1
206
+ websocket-client==1.8.0
207
+ Werkzeug==3.1.7
208
+ widgetsnbextension==4.0.13
209
+ zipp==1.0.0
code/VLAarchtests4_root/setup/public_benchmark/python_version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Python 3.11.10
code/VLAarchtests4_root/setup/public_benchmark/rlbench_env_pip_freeze.txt ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ accelerate==0.31.0
3
+ addict==2.4.0
4
+ aiohappyeyeballs==2.6.1
5
+ aiohttp==3.13.5
6
+ aiosignal==1.4.0
7
+ antlr4-python3-runtime==4.9.3
8
+ appdirs==1.4.4
9
+ asttokens==3.0.1
10
+ async-timeout==5.0.1
11
+ attrs==26.1.0
12
+ backports.zstd @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_backports.zstd_1767044984/work
13
+ blinker==1.9.0
14
+ blosc==1.11.4
15
+ Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1764016952863/work
16
+ cached-property @ file:///home/conda/feedstock_root/build_artifacts/cached_property_1615209429212/work
17
+ certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1772001073725/work/certifi
18
+ cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1761202865726/work
19
+ charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1773659966602/work
20
+ click==8.3.1
21
+ click-prompt==0.5.1
22
+ clip @ git+https://github.com/openai/CLIP.git@d05afc436d78f1c48dc0dbf8e5980a9d471f35f6
23
+ cloudpickle==3.1.2
24
+ comm==0.2.3
25
+ ConfigArgParse==1.7.5
26
+ contourpy @ file:///home/conda/feedstock_root/build_artifacts/contourpy_1744743067588/work
27
+ cycler @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_cycler_1764466758/work
28
+ dash==4.1.0
29
+ decorator==5.2.1
30
+ docker-pycreds==0.4.0
31
+ einops==0.8.0
32
+ exceptiongroup==1.3.1
33
+ executing==2.2.1
34
+ Farama-Notifications==0.0.4
35
+ fastjsonschema==2.21.2
36
+ filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1773313889543/work
37
+ Flask==3.1.3
38
+ fonttools @ file:///home/conda/feedstock_root/build_artifacts/fonttools_1773137064424/work
39
+ freetype-py==2.5.1
40
+ frozenlist==1.8.0
41
+ fsspec==2026.3.0
42
+ ftfy==6.2.0
43
+ gitdb==4.0.12
44
+ GitPython==3.1.46
45
+ gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1773244929835/work
46
+ grpcio==1.80.0
47
+ gym==0.26.2
48
+ gym-notices==0.1.0
49
+ gymnasium==1.0.0a2
50
+ h2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_h2_1756364871/work
51
+ h5py @ file:///home/conda/feedstock_root/build_artifacts/h5py_1774712049671/work
52
+ hf-xet==1.4.2
53
+ hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
54
+ huggingface_hub==0.36.2
55
+ hydra-core==1.3.2
56
+ hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
57
+ idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1760286409563/work
58
+ imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1738273805233/work
59
+ imageio-ffmpeg==0.6.0
60
+ importlib_metadata==9.0.0
61
+ iniconfig==2.3.0
62
+ ipython==8.39.0
63
+ ipywidgets==8.1.8
64
+ itsdangerous==2.2.0
65
+ jedi==0.19.2
66
+ Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_jinja2_1764517220/work
67
+ joblib==1.5.3
68
+ jsonschema==4.26.0
69
+ jsonschema-specifications==2025.9.1
70
+ jupyter_core==5.9.1
71
+ jupyterlab_widgets==3.0.16
72
+ kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_kiwisolver_1773067043/work
73
+ Markdown==3.10.2
74
+ markdown-it-py==4.0.0
75
+ MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1772444934960/work
76
+ matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1715976200404/work
77
+ matplotlib-inline==0.2.1
78
+ mdurl==0.1.2
79
+ moviepy==2.2.1
80
+ mpmath @ file:///home/conda/feedstock_root/build_artifacts/mpmath_1773661943568/work
81
+ multidict==6.7.1
82
+ munkres==1.1.4
83
+ narwhals==2.18.1
84
+ natsort==8.4.0
85
+ nbformat==5.10.4
86
+ nest-asyncio==1.6.0
87
+ networkx @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_networkx_1731521053/work
88
+ numpy==1.26.4
89
+ omegaconf==2.3.0
90
+ open3d==0.19.0
91
+ openai==0.28.1
92
+ opencv-python==4.10.0.84
93
+ packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1769093650/work
94
+ pandas @ file:///home/conda/feedstock_root/build_artifacts/pandas_1744430447393/work
95
+ parso==0.8.6
96
+ pathtools==0.1.2
97
+ perceiver-pytorch==0.8.8
98
+ pexpect==4.9.0
99
+ pillow==12.1.1
100
+ platformdirs==4.9.4
101
+ plotly==6.6.0
102
+ pluggy==1.6.0
103
+ ply @ file:///home/conda/feedstock_root/build_artifacts/ply_1733239724146/work
104
+ poetry-core==2.3.2
105
+ proglog==0.1.12
106
+ prompt_toolkit==3.0.52
107
+ propcache==0.4.1
108
+ protobuf==4.25.9
109
+ psutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_psutil_1769678154/work
110
+ ptyprocess==0.7.0
111
+ pure_eval==0.2.3
112
+ py-spy==0.4.1
113
+ pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
114
+ pyglet==2.1.13
115
+ Pygments==2.20.0
116
+ PyOpenGL==3.1.0
117
+ pyparsing @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pyparsing_1769003998/work
118
+ PyQt5==5.15.11
119
+ PyQt5_sip==12.17.0
120
+ pyquaternion==0.9.9
121
+ pyrender==0.1.45
122
+ -e git+https://github.com/markusgrotz/PyRep.git@b8bd1d7a3182adcd570d001649c0849047ebf197#egg=PyRep
123
+ PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
124
+ pytest==9.0.2
125
+ python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dateutil_1751104122/work
126
+ python-dotenv==1.2.2
127
+ pytorch-lamb==1.0.0
128
+ pytz @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pytz_1773679724/work
129
+ PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1770223234623/work
130
+ referencing==0.37.0
131
+ regex==2024.5.15
132
+ requests @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_requests_1774894783/work
133
+ retrying==1.4.2
134
+ # Editable install with no version control (reveal-vla-bimanual==0.1.0)
135
+ -e /workspace/reveal_vla_bimanual
136
+ rich==13.9.4
137
+ rich-click==1.8.9
138
+ -e git+https://github.com/markusgrotz/RLBench.git@8af748c51287989294e00c9c670e3330a0e35ed5#egg=rlbench
139
+ rpds-py==0.30.0
140
+ safetensors==0.4.3
141
+ scikit-learn==1.7.2
142
+ scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy-split_1716470219380/work/dist/scipy-1.13.1-cp310-cp310-linux_x86_64.whl#sha256=a4ff22b6dc27b61196be51695f53f9b0676e7c1bc564872b51fc3c41b79ae80b
143
+ segment-anything==1.0
144
+ sentry-sdk==2.57.0
145
+ setproctitle==1.3.7
146
+ sip @ file:///home/conda/feedstock_root/build_artifacts/sip_1759437834046/work
147
+ six @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_six_1753199211/work
148
+ smmap==5.0.3
149
+ stack-data==0.6.3
150
+ sympy @ file:///home/conda/feedstock_root/build_artifacts/sympy_1771952240620/work
151
+ tensorboard==2.16.2
152
+ tensorboard-data-server==0.7.2
153
+ tensorboardX==2.6.4
154
+ termcolor==3.3.0
155
+ threadpoolctl==3.6.0
156
+ timeout-decorator==0.5.0
157
+ timm==1.0.26
158
+ tokenizers==0.19.1
159
+ toml @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_toml_1764486833/work
160
+ tomli @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_tomli_1774492402/work
161
+ torch==2.3.1
162
+ torchaudio==2.3.1
163
+ torchvision==0.18.1
164
+ tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1774357896577/work
165
+ tqdm @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_tqdm_1770153424/work
166
+ traitlets==5.14.3
167
+ transformers==4.41.2
168
+ transforms3d==0.4.1
169
+ trimesh @ file:///home/conda/feedstock_root/build_artifacts/trimesh_1774412449209/work
170
+ triton==2.3.1
171
+ typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1756220668/work
172
+ tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1765719872007/work
173
+ unicodedata2 @ file:///home/conda/feedstock_root/build_artifacts/unicodedata2_1770908960326/work
174
+ urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1767817748113/work
175
+ wandb==0.14.0
176
+ wcwidth==0.2.14
177
+ Werkzeug==3.1.7
178
+ widgetsnbextension==4.0.15
179
+ yarl==1.23.0
180
+ -e git+https://github.com/markusgrotz/YARR.git@6822ff78602c77878b27d4cfe759ce029c67bffb#egg=yarr
181
+ zipp==3.23.0
code/VLAarchtests4_root/setup/public_benchmark/uname.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Linux 7bf60ec67db4 6.8.0-88-generic #89-Ubuntu SMP PREEMPT_DYNAMIC Sat Oct 11 01:02:46 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode12/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ea33aa5ffad718904206f80ce4b80a46d0d50d8242b7b7022e93125d22550b5
3
+ size 568456
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode15/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30f868aeb7ed7dfc523de54a5f779b147f148eb383526a370b251f3b0b2b3d61
3
+ size 578384
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode18/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1e51d36069f06804aa10d2cd2d67e1471bcf229f100b32e9e93bd4ba53e171
3
+ size 642658
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode20/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d7b1e987961292635111fa2bc9f318575a851087120bcd65451d055999eeea8
3
+ size 625748
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode27/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8ee3826de9f18c91cc44e7e25c017acd647d6c23584ef4253ef63224b9d4ce
3
+ size 586435
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode34/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59920a1d954892e419f82ace03bd9a75d2d5597ed75e8cb112d44e26fbb6b0f7
3
+ size 605954
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode4/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9683a54ffe5bf2660a91d5ea25ac96d3733cc10f1e5cd6f573869c66f9d00327
3
+ size 592262
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode43/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5eafc24e297c12ed8e53dbb780a6c848b92780ba4ae14eb93a91076979e21c8
3
+ size 555743
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode49/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:297e0520f3a8862ff3e49c3eb3d7d05356a4e23f8ab2a95090737d21cbe5ec81
3
+ size 582075
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode50/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4e903c529a6c62b374c4e00d25ab17d5f7ee2d33253e5ff8b4fed1350d2c041
3
+ size 596780
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode57/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f903fb5aac5bde8093b5e90f2d321c3128c30c19c21aaf4ad9cad76340d27717
3
+ size 601851
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode62/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85a002f02f2ac7d4792db29a5e4e0dc23e1d29763b37b91776f1d69698e69539
3
+ size 563917
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode65/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55a40ffa02b4b2700ffb9cf91ffaea97c0f2949dc7f50e00f38c257d0002737a
3
+ size 564743
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode68/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c7e5a1a4859e49fd0c20de588b290c9b14f26565b510dbbe7d2a25054576d2a
3
+ size 569596
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode71/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf9ff624965a496b71aa6ab4ef885d9835b25ce03c47b80d4e9813845d62a1a
3
+ size 578764
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode76/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a89f9eeb584881cb5ac320db945ac62ff9176265b127e8ef4a10fc1d81f18f94
3
+ size 627864
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode81/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aad865c66bf34b71e47ebe37b4f804ec8b76dde37c5083107b23a13b52d8bbe1
3
+ size 578663
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode86/low_dim_obs.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff4e620c0c6547ebbae4420b6924b4e0e4e1f54e461443ea18d235637d20aa68
3
+ size 623542