Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +212 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/.gitignore +2 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/CACHEDIR.TAG +4 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/README.md +8 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/lastfailed +1 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/nodeids +11 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/public_benchmark_package_v1.json +202 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md +114 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md +87 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md +102 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_hybrid_public_benchmark_smoke.py +142 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py +887 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py +855 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py +184 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py +91 -0
- code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py +63 -0
- code/VLAarchtests4_root/MODEL_AND_ARTIFACT_INDEX.md +53 -0
- code/VLAarchtests4_root/PUBLIC_BENCHMARK_RESULTS.md +217 -0
- code/VLAarchtests4_root/README.md +407 -0
- code/VLAarchtests4_root/docs/maniskill_pickclutter_correction_log_2026-04-01.md +187 -0
- code/VLAarchtests4_root/docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md +134 -0
- code/VLAarchtests4_root/docs/public_benchmark_progress_2026-04-01.md +91 -0
- code/VLAarchtests4_root/docs/public_bridge_smoke_run_log_2026-04-01.md +116 -0
- code/VLAarchtests4_root/setup/public_benchmark/ENVIRONMENT.md +68 -0
- code/VLAarchtests4_root/setup/public_benchmark/env_vars.sh +10 -0
- code/VLAarchtests4_root/setup/public_benchmark/gpu_short.txt +1 -0
- code/VLAarchtests4_root/setup/public_benchmark/hf_env.txt +35 -0
- code/VLAarchtests4_root/setup/public_benchmark/nvidia_smi.txt +21 -0
- code/VLAarchtests4_root/setup/public_benchmark/pip_freeze_python311.txt +209 -0
- code/VLAarchtests4_root/setup/public_benchmark/python_version.txt +1 -0
- code/VLAarchtests4_root/setup/public_benchmark/rlbench_env_pip_freeze.txt +181 -0
- code/VLAarchtests4_root/setup/public_benchmark/uname.txt +1 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode12/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode15/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode18/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode20/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode27/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode34/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode4/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode43/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode49/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode50/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode57/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode62/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode65/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode68/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode71/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode76/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode81/low_dim_obs.pkl +3 -0
- data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode86/low_dim_obs.pkl +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,215 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
third_party/AnyBimanual/agents/rvt/rvt/libs/point-renderer/demo.png filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Baxter.ttm filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
third_party/AnyBimanual/third_party/PyRep/pyrep/backend/_sim_cffi.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_youbot_navigation.ttt filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_turtlebot_navigation.ttt filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_reinforcement_learning_env.ttt filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_baxter_pick_and_pass.ttt filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_locobot_stack_cube.ttt filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
third_party/AnyBimanual/third_party/PyRep/examples/scene_panda_reach_target.ttt filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
third_party/AnyBimanual/third_party/RLBench/readme_files/task_grid.png filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
third_party/AnyBimanual/third_party/PyRep/tutorials/images/kinematics_group.png filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
third_party/AnyBimanual/third_party/PyRep/tutorials/images/collision_collections.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_robots.ttt filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene.ttt filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_mobiles.ttt filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
third_party/AnyBimanual/third_party/PyRep/tests/assets/test_scene_mobiles_with_arms.ttt filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
third_party/AnyBimanual/third_party/PyRep/tests/assets/cracker_box/texture_map.png filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/LineTracer.ttm filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/youBot.ttm filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/Robotiq85.ttm filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/turtlebot.ttm filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/PandaGripper.ttm filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/mobiles/LoCoBot.ttm filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/MicoHand.ttm filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/JacoHand.ttm filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/end_effectors/BaxterGripper.ttm filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR5.ttm filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR3.ttm filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/UR10.ttm filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/XArm7.ttm filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Sawyer.ttm filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Panda.ttm filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Mico.ttm filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/LBR_iiwa_14_R820.ttm filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/LBR_iiwa_7_R800.ttm filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Jaco.ttm filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
third_party/AnyBimanual/third_party/PyRep/robot_ttms/arms/Dobot.ttm filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
third_party/3d_flowmatch_actor/fig/peract_hiveformer.jpg filter=lfs diff=lfs merge=lfs -text
|
| 74 |
+
third_party/3d_flowmatch_actor/fig/peract2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 75 |
+
third_party/3d_flowmatch_actor/fig/diagram_gif.gif filter=lfs diff=lfs merge=lfs -text
|
| 76 |
+
third_party/AnyBimanual/third_party/PyRep/build/temp.linux-x86_64-cpython-311/build/pyrep/backend/pyrep/backend/_sim_cffi.o filter=lfs diff=lfs merge=lfs -text
|
| 77 |
+
third_party/AnyBimanual/third_party/PyRep/build/lib.linux-x86_64-cpython-311/pyrep/backend/_sim_cffi.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
|
| 78 |
+
third_party/AnyBimanual/docs/logo.png filter=lfs diff=lfs merge=lfs -text
|
| 79 |
+
third_party/AnyBimanual/docs/pipeline.png filter=lfs diff=lfs merge=lfs -text
|
| 80 |
+
third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/1.png filter=lfs diff=lfs merge=lfs -text
|
| 81 |
+
third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/2.png filter=lfs diff=lfs merge=lfs -text
|
| 82 |
+
third_party/AnyBimanual/anybimanual_real_supply/data/pick_in_two_keyframe/episode0/front_rgb/0.png filter=lfs diff=lfs merge=lfs -text
|
| 83 |
+
third_party/DexGarmentLab/Data/Hang_Coat_Validation_HALO/final_state_pic/img_1.png filter=lfs diff=lfs merge=lfs -text
|
| 84 |
+
third_party/DexGarmentLab/Data/Hang_Coat_Validation_HALO/final_state_pic/img_0.png filter=lfs diff=lfs merge=lfs -text
|
| 85 |
+
third_party/AnyBimanual/third_party/pytorch3d/tests/pulsar/reference/examples_TestRenderer_test_smallopt.png filter=lfs diff=lfs merge=lfs -text
|
| 86 |
+
third_party/AnyBimanual/third_party/pytorch3d/tests/pulsar/reference/examples_TestRenderer_test_cam.png filter=lfs diff=lfs merge=lfs -text
|
| 87 |
+
third_party/AnyBimanual/third_party/pytorch3d/tests/data/test_cow_image_rectangle_MeshRasterizerOpenGL.png filter=lfs diff=lfs merge=lfs -text
|
| 88 |
+
third_party/AnyBimanual/third_party/pytorch3d/tests/data/cow.glb filter=lfs diff=lfs merge=lfs -text
|
| 89 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/transforms_overview.jpg filter=lfs diff=lfs merge=lfs -text
|
| 90 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/world_camera_image.jpg filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/p3d_vs_softras.png filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/p3d_naive_vs_coarse.png filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/meshrcnn.png filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/iou3d.gif filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/batch_intro.png filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/batch_modes.gif filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
third_party/AnyBimanual/third_party/pytorch3d/docs/notes/assets/architecture_renderer.jpg filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/shapenet_render.png filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/render_textured_mesh.gif filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/nerf_project_logo.gif filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/fit_nerf.gif filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/fit_textured_volume.gif filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/implicitron_config.gif filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/densepose_render.png filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/dolphin_deform.gif filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/cow_deform.gif filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/camera_position_teapot.gif filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
third_party/AnyBimanual/third_party/pytorch3d/.github/bundle_adjust.gif filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/waypoints_added.png filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/w0_ext_string.png filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/translate_cuboid_labeled.png filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/task_design_empty_labeled.png filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/task_design_empty.png filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/spawn_boundary.png filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/set_dummy_orient_labeled.png filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/scaling_factors_labeled.png filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/save_builder.gif filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/slide_block_to_target.gif filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/rotate_wall.png filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/pos_relto_parent.png filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/make_grey.png filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/large_container_scene.png filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/just_boxes.png filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/grouping.gif filter=lfs diff=lfs merge=lfs -text
|
| 125 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/invert_vis_layer.gif filter=lfs diff=lfs merge=lfs -text
|
| 126 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/cuboid_plane.png filter=lfs diff=lfs merge=lfs -text
|
| 127 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/empty_container.gif filter=lfs diff=lfs merge=lfs -text
|
| 128 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/container_wall.png filter=lfs diff=lfs merge=lfs -text
|
| 129 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/change_proxsense_size_labeled.png filter=lfs diff=lfs merge=lfs -text
|
| 130 |
+
third_party/AnyBimanual/third_party/RLBench/tutorials/tutorial_images/1base_1wall.png filter=lfs diff=lfs merge=lfs -text
|
| 131 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_design_bimanual.ttt filter=lfs diff=lfs merge=lfs -text
|
| 132 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_design.ttt filter=lfs diff=lfs merge=lfs -text
|
| 133 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/weighing_scales.ttm filter=lfs diff=lfs merge=lfs -text
|
| 134 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/tv_on.ttm filter=lfs diff=lfs merge=lfs -text
|
| 135 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/water_plants.ttm filter=lfs diff=lfs merge=lfs -text
|
| 136 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/unplug_charger.ttm filter=lfs diff=lfs merge=lfs -text
|
| 137 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/turn_oven_on.ttm filter=lfs diff=lfs merge=lfs -text
|
| 138 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/turn_tap.ttm filter=lfs diff=lfs merge=lfs -text
|
| 139 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_usb_out_of_computer.ttm filter=lfs diff=lfs merge=lfs -text
|
| 140 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
|
| 141 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_umbrella_out_of_umbrella_stand.ttm filter=lfs diff=lfs merge=lfs -text
|
| 142 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_lid_off_saucepan.ttm filter=lfs diff=lfs merge=lfs -text
|
| 143 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 144 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_money_out_safe.ttm filter=lfs diff=lfs merge=lfs -text
|
| 145 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_off_weighing_scales.ttm filter=lfs diff=lfs merge=lfs -text
|
| 146 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/sweep_to_dustpan.ttm filter=lfs diff=lfs merge=lfs -text
|
| 147 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/sweep_to_dustpan_of_size.ttm filter=lfs diff=lfs merge=lfs -text
|
| 148 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_frame_off_hanger.ttm filter=lfs diff=lfs merge=lfs -text
|
| 149 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/straighten_rope.ttm filter=lfs diff=lfs merge=lfs -text
|
| 150 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_wine.ttm filter=lfs diff=lfs merge=lfs -text
|
| 151 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_cups.ttm filter=lfs diff=lfs merge=lfs -text
|
| 152 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/stack_chairs.ttm filter=lfs diff=lfs merge=lfs -text
|
| 153 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/take_cup_out_from_cabinet.ttm filter=lfs diff=lfs merge=lfs -text
|
| 154 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/solve_puzzle.ttm filter=lfs diff=lfs merge=lfs -text
|
| 155 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/setup_chess.ttm filter=lfs diff=lfs merge=lfs -text
|
| 156 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/slide_cabinet_open_and_place_cups.ttm filter=lfs diff=lfs merge=lfs -text
|
| 157 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/setup_checkers.ttm filter=lfs diff=lfs merge=lfs -text
|
| 158 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/slide_cabinet_open.ttm filter=lfs diff=lfs merge=lfs -text
|
| 159 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/set_the_table.ttm filter=lfs diff=lfs merge=lfs -text
|
| 160 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/set_clock_to_time.ttm filter=lfs diff=lfs merge=lfs -text
|
| 161 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/scoop_with_spatula.ttm filter=lfs diff=lfs merge=lfs -text
|
| 162 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/remove_cups.ttm filter=lfs diff=lfs merge=lfs -text
|
| 163 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_tray_in_oven.ttm filter=lfs diff=lfs merge=lfs -text
|
| 164 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_umbrella_in_umbrella_stand.ttm filter=lfs diff=lfs merge=lfs -text
|
| 165 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_rubbish_in_color_bin.ttm filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_rubbish_in_bin.ttm filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_shoes_in_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_money_in_safe.ttm filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_knife_on_chopping_board.ttm filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_knife_in_knife_block.ttm filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_groceries_in_cupboard.ttm filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_bottle_in_fridge.ttm filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_all_groceries_in_cupboard.ttm filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pour_from_cup_to_cup.ttm filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/press_switch.ttm filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_books_at_shelf_location.ttm filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/put_books_on_bookshelf.ttm filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/plug_charger_in_power_supply.ttm filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/play_jenga.ttm filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_wine_at_rack_location.ttm filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_shape_in_shape_sorter.ttm filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_hanger_on_rack.ttm filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_and_lift_small.ttm filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_up_cup.ttm filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/pick_and_lift.ttm filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/place_cups.ttm filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/phone_on_base.ttm filter=lfs diff=lfs merge=lfs -text
|
| 188 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_wine_bottle.ttm filter=lfs diff=lfs merge=lfs -text
|
| 189 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_washing_machine.ttm filter=lfs diff=lfs merge=lfs -text
|
| 190 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_oven.ttm filter=lfs diff=lfs merge=lfs -text
|
| 191 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_microwave.ttm filter=lfs diff=lfs merge=lfs -text
|
| 192 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_grill.ttm filter=lfs diff=lfs merge=lfs -text
|
| 193 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_fridge.ttm filter=lfs diff=lfs merge=lfs -text
|
| 194 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_window.ttm filter=lfs diff=lfs merge=lfs -text
|
| 195 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_jar.ttm filter=lfs diff=lfs merge=lfs -text
|
| 196 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_door.ttm filter=lfs diff=lfs merge=lfs -text
|
| 197 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/move_hanger.ttm filter=lfs diff=lfs merge=lfs -text
|
| 198 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/open_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 199 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/light_bulb_out.ttm filter=lfs diff=lfs merge=lfs -text
|
| 200 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/light_bulb_in.ttm filter=lfs diff=lfs merge=lfs -text
|
| 201 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/meat_on_grill.ttm filter=lfs diff=lfs merge=lfs -text
|
| 202 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/meat_off_grill.ttm filter=lfs diff=lfs merge=lfs -text
|
| 203 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/lamp_off.ttm filter=lfs diff=lfs merge=lfs -text
|
| 204 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/lamp_on.ttm filter=lfs diff=lfs merge=lfs -text
|
| 205 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/insert_usb_in_computer.ttm filter=lfs diff=lfs merge=lfs -text
|
| 206 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hit_ball_with_queue.ttm filter=lfs diff=lfs merge=lfs -text
|
| 207 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hang_frame_on_hanger.ttm filter=lfs diff=lfs merge=lfs -text
|
| 208 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/hockey.ttm filter=lfs diff=lfs merge=lfs -text
|
| 209 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/empty_dishwasher.ttm filter=lfs diff=lfs merge=lfs -text
|
| 210 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/get_ice_from_fridge.ttm filter=lfs diff=lfs merge=lfs -text
|
| 211 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/cut_vegetables.ttm filter=lfs diff=lfs merge=lfs -text
|
| 212 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
|
| 213 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_put_bottle_in_fridge.ttm filter=lfs diff=lfs merge=lfs -text
|
| 214 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 215 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_lift_stick.ttm filter=lfs diff=lfs merge=lfs -text
|
| 216 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_microwave.ttm filter=lfs diff=lfs merge=lfs -text
|
| 217 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/coordinated_close_jar.ttm filter=lfs diff=lfs merge=lfs -text
|
| 218 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_laptop_lid.ttm filter=lfs diff=lfs merge=lfs -text
|
| 219 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_jar.ttm filter=lfs diff=lfs merge=lfs -text
|
| 220 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_grill.ttm filter=lfs diff=lfs merge=lfs -text
|
| 221 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_fridge.ttm filter=lfs diff=lfs merge=lfs -text
|
| 222 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_door.ttm filter=lfs diff=lfs merge=lfs -text
|
| 223 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/close_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 224 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/change_clock.ttm filter=lfs diff=lfs merge=lfs -text
|
| 225 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/change_channel.ttm filter=lfs diff=lfs merge=lfs -text
|
| 226 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_sweep_to_dustpan.ttm filter=lfs diff=lfs merge=lfs -text
|
| 227 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_straighten_rope.ttm filter=lfs diff=lfs merge=lfs -text
|
| 228 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_set_the_table.ttm filter=lfs diff=lfs merge=lfs -text
|
| 229 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_pick_plate.ttm filter=lfs diff=lfs merge=lfs -text
|
| 230 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_pick_laptop.ttm filter=lfs diff=lfs merge=lfs -text
|
| 231 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/beat_the_buzz.ttm filter=lfs diff=lfs merge=lfs -text
|
| 232 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/basketball_in_hoop.ttm filter=lfs diff=lfs merge=lfs -text
|
| 233 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_take_tray_out_of_oven.ttm filter=lfs diff=lfs merge=lfs -text
|
| 234 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/ur5.ttm filter=lfs diff=lfs merge=lfs -text
|
| 235 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/sawyer.ttm filter=lfs diff=lfs merge=lfs -text
|
| 236 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/task_ttms/bimanual_take_shoes_out_of_box.ttm filter=lfs diff=lfs merge=lfs -text
|
| 237 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/panda.ttm filter=lfs diff=lfs merge=lfs -text
|
| 238 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/mico.ttm filter=lfs diff=lfs merge=lfs -text
|
| 239 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/jaco.ttm filter=lfs diff=lfs merge=lfs -text
|
| 240 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/robot_ttms/dual_panda.ttm filter=lfs diff=lfs merge=lfs -text
|
| 241 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/knife_block.ttm filter=lfs diff=lfs merge=lfs -text
|
| 242 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/frying_pan.ttm filter=lfs diff=lfs merge=lfs -text
|
| 243 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/dishwasher.ttm filter=lfs diff=lfs merge=lfs -text
|
| 244 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/chopping_board.ttm filter=lfs diff=lfs merge=lfs -text
|
| 245 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/door.ttm filter=lfs diff=lfs merge=lfs -text
|
| 246 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/carrot.ttm filter=lfs diff=lfs merge=lfs -text
|
| 247 |
+
third_party/AnyBimanual/third_party/RLBench/rlbench/assets/banana.ttm filter=lfs diff=lfs merge=lfs -text
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/.gitignore
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Created by pytest automatically.
|
| 2 |
+
*
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/CACHEDIR.TAG
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Signature: 8a477f597d28d172789f06886806bc55
|
| 2 |
+
# This file is a cache directory tag created by pytest.
|
| 3 |
+
# For information about cache directory tags, see:
|
| 4 |
+
# https://bford.info/cachedir/spec.html
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/README.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# pytest cache directory #
|
| 2 |
+
|
| 3 |
+
This directory contains data from the pytest's cache plugin,
|
| 4 |
+
which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
|
| 5 |
+
|
| 6 |
+
**Do not** commit this to version control.
|
| 7 |
+
|
| 8 |
+
See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/lastfailed
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{}
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/.pytest_cache/v/cache/nodeids
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
"tests/test_eval_toggle_paths_work.py::test_eval_toggle_paths_work",
|
| 3 |
+
"tests/test_general_eval_protocol_is_identical.py::test_general_eval_protocol_is_identical_across_modes",
|
| 4 |
+
"tests/test_public_benchmark_package_summary.py::test_public_benchmark_package_detects_training_mismatch",
|
| 5 |
+
"tests/test_public_benchmark_package_summary.py::test_public_benchmark_package_summary_passes_with_clear_gain",
|
| 6 |
+
"tests/test_public_benchmark_package_tracks.py::test_public_anchor_protocol_identity_is_mode_invariant",
|
| 7 |
+
"tests/test_public_benchmark_package_tracks.py::test_public_benchmark_package_contains_expected_tracks",
|
| 8 |
+
"tests/test_public_benchmark_package_tracks.py::test_public_target_protocol_identity_is_mode_invariant",
|
| 9 |
+
"tests/test_public_benchmark_package_tracks.py::test_public_track_roles_are_partitioned",
|
| 10 |
+
"tests/test_public_benchmark_package_tracks.py::test_training_fairness_signature_matches_for_trunk_and_adapter"
|
| 11 |
+
]
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/artifacts/generated_configs/public_benchmark_package_v1.json
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"anchor_eval_modes": [
|
| 3 |
+
"trunk_only",
|
| 4 |
+
"adapter_noop",
|
| 5 |
+
"adapter_active"
|
| 6 |
+
],
|
| 7 |
+
"anchor_track_ids": [],
|
| 8 |
+
"defaults": {
|
| 9 |
+
"anchor_episodes": 25,
|
| 10 |
+
"resolution": 256,
|
| 11 |
+
"target_test_episodes": 100,
|
| 12 |
+
"target_train_demos": 64,
|
| 13 |
+
"target_val_demos": 16
|
| 14 |
+
},
|
| 15 |
+
"package_name": "public_hybrid_bimanual_benchmark_v1",
|
| 16 |
+
"target_eval_modes": [
|
| 17 |
+
"trunk_only_ft",
|
| 18 |
+
"adapter_noop",
|
| 19 |
+
"adapter_active_ft"
|
| 20 |
+
],
|
| 21 |
+
"target_track_ids": [
|
| 22 |
+
"rlbench2_put_bottle_in_fridge",
|
| 23 |
+
"rlbench2_take_out_tray",
|
| 24 |
+
"rlbench2_take_shoes_out_of_box",
|
| 25 |
+
"rlbench2_lift_tray",
|
| 26 |
+
"rlbench2_straighten_rope",
|
| 27 |
+
"rlbench2_sweep_to_dustpan",
|
| 28 |
+
"dexgarmentlab_store_tops",
|
| 29 |
+
"dexgarmentlab_fold_tops",
|
| 30 |
+
"dexgarmentlab_hang_coat"
|
| 31 |
+
],
|
| 32 |
+
"thresholds": {
|
| 33 |
+
"anchor_tolerance": 0.02,
|
| 34 |
+
"sign_of_life_intervention_rate": 0.15,
|
| 35 |
+
"sign_of_life_non_base_selection_rate": 0.15,
|
| 36 |
+
"sign_of_life_success_gain": 0.05
|
| 37 |
+
},
|
| 38 |
+
"tracks": [
|
| 39 |
+
{
|
| 40 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 41 |
+
"benchmark_task": "bimanual_put_bottle_in_fridge",
|
| 42 |
+
"default_cameras": [
|
| 43 |
+
"front",
|
| 44 |
+
"wrist_left",
|
| 45 |
+
"wrist_right"
|
| 46 |
+
],
|
| 47 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
|
| 48 |
+
"observation_stack": "rgbd_3cam",
|
| 49 |
+
"public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
|
| 50 |
+
"role": "target",
|
| 51 |
+
"success_metric": "success_rate",
|
| 52 |
+
"suite": "rlbench2",
|
| 53 |
+
"target_behavior": "grasp and place a bottle into a constrained refrigerator cavity",
|
| 54 |
+
"task_family": "containment_manipulation",
|
| 55 |
+
"track_id": "rlbench2_put_bottle_in_fridge"
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 59 |
+
"benchmark_task": "bimanual_take_tray_out_of_oven",
|
| 60 |
+
"default_cameras": [
|
| 61 |
+
"front",
|
| 62 |
+
"wrist_left",
|
| 63 |
+
"wrist_right"
|
| 64 |
+
],
|
| 65 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
|
| 66 |
+
"observation_stack": "rgbd_3cam",
|
| 67 |
+
"public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
|
| 68 |
+
"role": "target",
|
| 69 |
+
"success_metric": "success_rate",
|
| 70 |
+
"suite": "rlbench2",
|
| 71 |
+
"target_behavior": "extract a tray from an enclosed appliance with coordinated bimanual motion",
|
| 72 |
+
"task_family": "tray_extraction",
|
| 73 |
+
"track_id": "rlbench2_take_out_tray"
|
| 74 |
+
},
|
| 75 |
+
{
|
| 76 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 77 |
+
"benchmark_task": "bimanual_take_shoes_out_of_box",
|
| 78 |
+
"default_cameras": [
|
| 79 |
+
"front",
|
| 80 |
+
"wrist_left",
|
| 81 |
+
"wrist_right"
|
| 82 |
+
],
|
| 83 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 shoe-box extraction task.",
|
| 84 |
+
"observation_stack": "rgbd_3cam",
|
| 85 |
+
"public_source": "https://bimanual.github.io/",
|
| 86 |
+
"role": "target",
|
| 87 |
+
"success_metric": "success_rate",
|
| 88 |
+
"suite": "rlbench2",
|
| 89 |
+
"target_behavior": "open a shoe box and remove both shoes with coordinated bimanual manipulation",
|
| 90 |
+
"task_family": "container_extraction",
|
| 91 |
+
"track_id": "rlbench2_take_shoes_out_of_box"
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 95 |
+
"benchmark_task": "bimanual_lift_tray",
|
| 96 |
+
"default_cameras": [
|
| 97 |
+
"front",
|
| 98 |
+
"wrist_left",
|
| 99 |
+
"wrist_right"
|
| 100 |
+
],
|
| 101 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
|
| 102 |
+
"observation_stack": "rgbd_3cam",
|
| 103 |
+
"public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
|
| 104 |
+
"role": "target",
|
| 105 |
+
"success_metric": "success_rate",
|
| 106 |
+
"suite": "rlbench2",
|
| 107 |
+
"target_behavior": "coordinate both arms to lift a tray while maintaining grasp consistency",
|
| 108 |
+
"task_family": "tray_lifting",
|
| 109 |
+
"track_id": "rlbench2_lift_tray"
|
| 110 |
+
},
|
| 111 |
+
{
|
| 112 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 113 |
+
"benchmark_task": "bimanual_straighten_rope",
|
| 114 |
+
"default_cameras": [
|
| 115 |
+
"front",
|
| 116 |
+
"wrist_left",
|
| 117 |
+
"wrist_right"
|
| 118 |
+
],
|
| 119 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
|
| 120 |
+
"observation_stack": "rgbd_3cam",
|
| 121 |
+
"public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
|
| 122 |
+
"role": "target",
|
| 123 |
+
"success_metric": "success_rate",
|
| 124 |
+
"suite": "rlbench2",
|
| 125 |
+
"target_behavior": "coordinate both arms to manipulate and straighten a rope",
|
| 126 |
+
"task_family": "deformable_linear_manipulation",
|
| 127 |
+
"track_id": "rlbench2_straighten_rope"
|
| 128 |
+
},
|
| 129 |
+
{
|
| 130 |
+
"action_space": "bimanual_pose_then_gripper",
|
| 131 |
+
"benchmark_task": "bimanual_sweep_to_dustpan",
|
| 132 |
+
"default_cameras": [
|
| 133 |
+
"front",
|
| 134 |
+
"wrist_left",
|
| 135 |
+
"wrist_right"
|
| 136 |
+
],
|
| 137 |
+
"notes": "Smoke runner verifies headless reset plus one nonzero bimanual pose-and-gripper step on the public RLBench2 task.",
|
| 138 |
+
"observation_stack": "rgbd_3cam",
|
| 139 |
+
"public_source": "https://github.com/markusgrotz/RLBench/tree/master/rlbench/bimanual_tasks",
|
| 140 |
+
"role": "target",
|
| 141 |
+
"success_metric": "success_rate",
|
| 142 |
+
"suite": "rlbench2",
|
| 143 |
+
"target_behavior": "coordinate a sweeping motion that moves debris toward a dustpan region",
|
| 144 |
+
"task_family": "tool_use_sweeping",
|
| 145 |
+
"track_id": "rlbench2_sweep_to_dustpan"
|
| 146 |
+
},
|
| 147 |
+
{
|
| 148 |
+
"action_space": "bimanual_ik_hand_state",
|
| 149 |
+
"benchmark_task": "Store Tops",
|
| 150 |
+
"default_cameras": [
|
| 151 |
+
"env_camera",
|
| 152 |
+
"garment_camera",
|
| 153 |
+
"object_camera"
|
| 154 |
+
],
|
| 155 |
+
"notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
|
| 156 |
+
"observation_stack": "rgbd_pointcloud_3cam",
|
| 157 |
+
"public_source": "https://github.com/wayrise/DexGarmentLab",
|
| 158 |
+
"role": "target",
|
| 159 |
+
"success_metric": "success_rate",
|
| 160 |
+
"suite": "dexgarmentlab",
|
| 161 |
+
"target_behavior": "lift and place a top into the target storage region",
|
| 162 |
+
"task_family": "garment_storage",
|
| 163 |
+
"track_id": "dexgarmentlab_store_tops"
|
| 164 |
+
},
|
| 165 |
+
{
|
| 166 |
+
"action_space": "bimanual_ik_hand_state",
|
| 167 |
+
"benchmark_task": "Fold Tops",
|
| 168 |
+
"default_cameras": [
|
| 169 |
+
"env_camera",
|
| 170 |
+
"garment_camera",
|
| 171 |
+
"object_camera"
|
| 172 |
+
],
|
| 173 |
+
"notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
|
| 174 |
+
"observation_stack": "rgbd_pointcloud_3cam",
|
| 175 |
+
"public_source": "https://github.com/wayrise/DexGarmentLab",
|
| 176 |
+
"role": "target",
|
| 177 |
+
"success_metric": "success_rate",
|
| 178 |
+
"suite": "dexgarmentlab",
|
| 179 |
+
"target_behavior": "execute a multi-stage fold of a top garment with bimanual dexterous control",
|
| 180 |
+
"task_family": "garment_folding",
|
| 181 |
+
"track_id": "dexgarmentlab_fold_tops"
|
| 182 |
+
},
|
| 183 |
+
{
|
| 184 |
+
"action_space": "bimanual_ik_hand_state",
|
| 185 |
+
"benchmark_task": "Hang Coat",
|
| 186 |
+
"default_cameras": [
|
| 187 |
+
"env_camera",
|
| 188 |
+
"garment_camera",
|
| 189 |
+
"object_camera"
|
| 190 |
+
],
|
| 191 |
+
"notes": "Smoke runner executes the shipped scripted GAM-driven policy headlessly and checks for nonzero point-cloud and action-derived quantities.",
|
| 192 |
+
"observation_stack": "rgbd_pointcloud_3cam",
|
| 193 |
+
"public_source": "https://github.com/wayrise/DexGarmentLab",
|
| 194 |
+
"role": "target",
|
| 195 |
+
"success_metric": "success_rate",
|
| 196 |
+
"suite": "dexgarmentlab",
|
| 197 |
+
"target_behavior": "lift and place a coat onto the hanging fixture with bimanual coordination",
|
| 198 |
+
"task_family": "garment_hanging",
|
| 199 |
+
"track_id": "dexgarmentlab_hang_coat"
|
| 200 |
+
}
|
| 201 |
+
]
|
| 202 |
+
}
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/README.md
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# reveal_vla_bimanual
|
| 2 |
+
|
| 3 |
+
Simulation-first prototype for a language-conditioned bimanual reveal-and-retrieve policy under elastic occlusion.
|
| 4 |
+
|
| 5 |
+
This repo is not a generalist VLA backbone in the RT-2 / OpenVLA / Octo sense. The current contribution is the reveal-state machinery layered on top of a frozen vision-language encoder.
|
| 6 |
+
|
| 7 |
+
This repo is structured around five top-level modules:
|
| 8 |
+
|
| 9 |
+
- `sim_rlbench/`: RLBench2 / PerAct2 wrappers, dataset hooks, camera setup, and benchmark evaluation helpers.
|
| 10 |
+
- `sim_reveal/`: reveal-proxy environments, scripted teachers, and privileged label extraction.
|
| 11 |
+
- `models/`: shared backbone wrappers, multi-view fusion, bimanual decoder, reveal-state head, world model, and planner.
|
| 12 |
+
- `train/`: trainers, losses, checkpointing, and Hydra/YAML configs.
|
| 13 |
+
- `eval/`: benchmark scripts, ablations, metrics, plots, and report generation.
|
| 14 |
+
|
| 15 |
+
Current bootstrap priorities:
|
| 16 |
+
|
| 17 |
+
1. Reproduce the RLBench2 / PerAct2 stack with a fixed 3-camera interface.
|
| 18 |
+
2. Stand up a backbone-only 3-camera policy in the same training/eval harness.
|
| 19 |
+
3. Add reveal-state supervision and short-horizon planning for synthetic reveal proxies.
|
| 20 |
+
|
| 21 |
+
## Public benchmark package
|
| 22 |
+
|
| 23 |
+
The repo now includes a concrete public-benchmark package definition for the next-stage fair comparison:
|
| 24 |
+
|
| 25 |
+
- `eval/public_benchmark_package.py`
|
| 26 |
+
- track registry for bag, dense occluded retrieval, cloth retrieval, and the generic anchor
|
| 27 |
+
- same-protocol signatures across `trunk_only`, `adapter_noop`, and `adapter_active`
|
| 28 |
+
- same-data / same-init fairness signatures for `trunk_only_ft` vs `adapter_active_ft`
|
| 29 |
+
|
| 30 |
+
- `eval/run_public_benchmark_package.py`
|
| 31 |
+
- validates normalized result files from multiple public suites
|
| 32 |
+
- checks protocol identity and training fairness
|
| 33 |
+
- aggregates per-track gains, sign-of-life diagnostics, and anchor regressions
|
| 34 |
+
|
| 35 |
+
Write the default manifest to `~/workspace` with:
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
python -m eval.run_public_benchmark_package \
|
| 39 |
+
--write-default-manifest ~/workspace/public_benchmark_package_v1.json
|
| 40 |
+
```
|
| 41 |
+
|
| 42 |
+
Summarize normalized result files with:
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
python -m eval.run_public_benchmark_package \
|
| 46 |
+
--result /abs/path/result_a.json \
|
| 47 |
+
--result /abs/path/result_b.json \
|
| 48 |
+
--output-dir ~/workspace/reports/public_benchmark_package_v1
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
Upstream dependencies are kept in `/workspace/third_party` and pinned in `docs/upstream_pins.md`.
|
| 52 |
+
|
| 53 |
+
## RLBench env A
|
| 54 |
+
|
| 55 |
+
The RLBench / PerAct2 stack is pinned to Python 3.10 and lives in `/workspace/envs/rlbench`.
|
| 56 |
+
|
| 57 |
+
Bring it up with:
|
| 58 |
+
|
| 59 |
+
```bash
|
| 60 |
+
/workspace/reveal_vla_bimanual/scripts/setup_env_a_rlbench.sh
|
| 61 |
+
/workspace/reveal_vla_bimanual/scripts/setup_rlbench_headless_x.sh
|
| 62 |
+
/workspace/reveal_vla_bimanual/scripts/start_rlbench_x.sh
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
Verify GPU GL on the headless display:
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
DISPLAY=:99 glxinfo -B
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
Run the RLBench launch/reset/step smoke test:
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
env \
|
| 75 |
+
DISPLAY=:99 \
|
| 76 |
+
XDG_RUNTIME_DIR=/tmp/runtime-root \
|
| 77 |
+
COPPELIASIM_ROOT=/workspace/assets/coppeliasim_v4_1_0 \
|
| 78 |
+
LD_LIBRARY_PATH=/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu:/workspace/system_shims/nvidia$(nvidia-smi --query-gpu=driver_version --format=csv,noheader | head -n1 | cut -d. -f1)/usr/lib/x86_64-linux-gnu/nvidia:/workspace/assets/coppeliasim_v4_1_0 \
|
| 79 |
+
QT_QPA_PLATFORM_PLUGIN_PATH=/workspace/assets/coppeliasim_v4_1_0 \
|
| 80 |
+
/workspace/.tools/micromamba/bin/micromamba run \
|
| 81 |
+
-r /workspace/.micromamba \
|
| 82 |
+
-p /workspace/envs/rlbench \
|
| 83 |
+
python -m sim_rlbench.launch_smoke --headless
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
The working benchmark interface is fixed to three cameras only:
|
| 87 |
+
|
| 88 |
+
- `front`
|
| 89 |
+
- `wrist_left`
|
| 90 |
+
- `wrist_right`
|
| 91 |
+
|
| 92 |
+
The smoke test covers launch, bimanual task reset, canonical observation extraction, and one bimanual action step in `headless=True`, which is the same mode used by the upstream PerAct2-style training stack.
|
| 93 |
+
|
| 94 |
+
Generate the PerAct2-compatible train command for the fixed 3-camera interface with:
|
| 95 |
+
|
| 96 |
+
```bash
|
| 97 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 98 |
+
python -m sim_rlbench.smoke_test --print-train-command
|
| 99 |
+
```
|
| 100 |
+
|
| 101 |
+
Download the published PerAct2 demos into `/workspace/data/rlbench2` with checksum verification:
|
| 102 |
+
|
| 103 |
+
```bash
|
| 104 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 105 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train
|
| 106 |
+
```
|
| 107 |
+
|
| 108 |
+
If you want the archives unpacked directly into the demo root expected by RLBench, add `--extract`:
|
| 109 |
+
|
| 110 |
+
```bash
|
| 111 |
+
apt-get install -y squashfs-tools
|
| 112 |
+
micromamba run -r /workspace/.micromamba -p /workspace/envs/rlbench \
|
| 113 |
+
python -m sim_rlbench.dataset_download --resolution 256 --splits train --extract
|
| 114 |
+
```
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/adapter_stack.md
ADDED
|
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Adapter Stack
|
| 2 |
+
|
| 3 |
+
This repo now contains a no-op-safe `trunk + adapter` path alongside the legacy monolithic policies.
|
| 4 |
+
|
| 5 |
+
## Main classes
|
| 6 |
+
|
| 7 |
+
- `models/policy.py`
|
| 8 |
+
- `FoundationTrunkPolicy`
|
| 9 |
+
- `ElasticOcclusionAdapter`
|
| 10 |
+
- `AdapterWrappedPolicy`
|
| 11 |
+
|
| 12 |
+
- `models/backbones.py`
|
| 13 |
+
- `NoOpAdapterCompatibleTrunkOutput`
|
| 14 |
+
- `TrunkInterface`
|
| 15 |
+
|
| 16 |
+
- `models/action_decoder.py`
|
| 17 |
+
- `TaskRoutedProposalPrior`
|
| 18 |
+
|
| 19 |
+
- `models/planner.py`
|
| 20 |
+
- `ElasticFeasibilityGate`
|
| 21 |
+
- `ResidualActionReranker`
|
| 22 |
+
- `AdapterPlanner`
|
| 23 |
+
|
| 24 |
+
- `models/world_model.py`
|
| 25 |
+
- `LightweightRevealStateTransitionModel`
|
| 26 |
+
|
| 27 |
+
- `models/observation_memory.py`
|
| 28 |
+
- `RevealStateCache`
|
| 29 |
+
|
| 30 |
+
## Trainer modes
|
| 31 |
+
|
| 32 |
+
`train/trainer.py` now supports:
|
| 33 |
+
|
| 34 |
+
- `policy_type: adapter_wrapped`
|
| 35 |
+
- `policy_type: foundation_trunk`
|
| 36 |
+
|
| 37 |
+
Relevant trainer fields:
|
| 38 |
+
|
| 39 |
+
- `training_regime`
|
| 40 |
+
- `eval_mode`
|
| 41 |
+
- `adapter_mode`
|
| 42 |
+
- `adapter_use_transition_model`
|
| 43 |
+
- `adapter_use_task_conditioning`
|
| 44 |
+
|
| 45 |
+
## Guardrail tests
|
| 46 |
+
|
| 47 |
+
New tests:
|
| 48 |
+
|
| 49 |
+
- `tests/test_trunk_noop_equivalence.py`
|
| 50 |
+
- `tests/test_adapter_gate_blocks_unsafe_retrieve.py`
|
| 51 |
+
- `tests/test_task_specific_loss_masking.py`
|
| 52 |
+
- `tests/test_cloth_specific_metrics_affect_selection.py`
|
| 53 |
+
- `tests/test_general_eval_protocol_is_identical.py`
|
| 54 |
+
|
| 55 |
+
## Config templates
|
| 56 |
+
|
| 57 |
+
- `train/configs/proxy_adapter_wrapped_clip_base.yaml`
|
| 58 |
+
- `train/configs/proxy_adapter_wrapped_clip_rank_only.yaml`
|
| 59 |
+
- `train/configs/proxy_adapter_wrapped_clip_noop_eval.yaml`
|
| 60 |
+
|
| 61 |
+
## Benchmark wrappers
|
| 62 |
+
|
| 63 |
+
- `scripts/run_anchor_adapter_ablations.sh`
|
| 64 |
+
- `scripts/run_proxy_adapter_ablations.sh`
|
| 65 |
+
- `scripts/run_target_like_adapter_subset.sh`
|
| 66 |
+
- `eval/public_benchmark_package.py`
|
| 67 |
+
- `eval/run_public_benchmark_package.py`
|
| 68 |
+
|
| 69 |
+
All new configs and scripts default to `~/workspace` outputs and reports.
|
| 70 |
+
|
| 71 |
+
## Public benchmark package
|
| 72 |
+
|
| 73 |
+
The public benchmark package is the current fair-comparison contract for real benchmarks:
|
| 74 |
+
|
| 75 |
+
- target tracks:
|
| 76 |
+
- `bag_track` -> `BEHAVIOR-1K/unpacking_childs_bag-0`
|
| 77 |
+
- `occlusion_track` -> `ManiSkill/PickClutterYCB-v1`
|
| 78 |
+
- `cloth_track` -> `GarmentLab/grasp_protocol_stacked_garment`
|
| 79 |
+
- anchor track:
|
| 80 |
+
- `anchor_track` -> `AnyBimanual/dual_push_buttons`
|
| 81 |
+
|
| 82 |
+
The package code enforces:
|
| 83 |
+
|
| 84 |
+
- mode-invariant eval protocols per track
|
| 85 |
+
- same-data / same-init fairness for `trunk_only_ft` vs `adapter_active_ft`
|
| 86 |
+
- sign-of-life thresholds on intervention and non-base proposal selection
|
| 87 |
+
- no-regression tolerance on the trusted generic anchor
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/docs/public_benchmark_package.md
ADDED
|
@@ -0,0 +1,102 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Public Benchmark Package
|
| 2 |
+
|
| 3 |
+
This repo now contains the requested hybrid public-benchmark package for the real-sim phase.
|
| 4 |
+
|
| 5 |
+
## Tracks
|
| 6 |
+
|
| 7 |
+
- `rlbench2_put_bottle_in_fridge`
|
| 8 |
+
- suite: `RLBench2`
|
| 9 |
+
- task: `bimanual_put_bottle_in_fridge`
|
| 10 |
+
- `rlbench2_take_out_tray`
|
| 11 |
+
- suite: `RLBench2`
|
| 12 |
+
- task: `bimanual_take_tray_out_of_oven`
|
| 13 |
+
- `rlbench2_take_shoes_out_of_box`
|
| 14 |
+
- suite: `RLBench2`
|
| 15 |
+
- task: `bimanual_take_shoes_out_of_box`
|
| 16 |
+
- `rlbench2_lift_tray`
|
| 17 |
+
- suite: `RLBench2`
|
| 18 |
+
- task: `bimanual_lift_tray`
|
| 19 |
+
- `rlbench2_straighten_rope`
|
| 20 |
+
- suite: `RLBench2`
|
| 21 |
+
- task: `bimanual_straighten_rope`
|
| 22 |
+
- `rlbench2_sweep_to_dustpan`
|
| 23 |
+
- suite: `RLBench2`
|
| 24 |
+
- task: `bimanual_sweep_to_dustpan`
|
| 25 |
+
- `dexgarmentlab_store_tops`
|
| 26 |
+
- suite: `DexGarmentLab`
|
| 27 |
+
- task: `Store Tops`
|
| 28 |
+
- `dexgarmentlab_fold_tops`
|
| 29 |
+
- suite: `DexGarmentLab`
|
| 30 |
+
- task: `Fold Tops`
|
| 31 |
+
- `dexgarmentlab_hang_coat`
|
| 32 |
+
- suite: `DexGarmentLab`
|
| 33 |
+
- task: `Hang Coat`
|
| 34 |
+
|
| 35 |
+
## Enforced fairness
|
| 36 |
+
|
| 37 |
+
- `trunk_only_ft` and `adapter_active_ft` must share:
|
| 38 |
+
- train demos
|
| 39 |
+
- val demos
|
| 40 |
+
- init checkpoint group
|
| 41 |
+
- optimizer
|
| 42 |
+
- LR schedule
|
| 43 |
+
- batch size
|
| 44 |
+
- augmentations
|
| 45 |
+
- early stopping metric
|
| 46 |
+
- max gradient steps
|
| 47 |
+
- unfrozen trunk scope
|
| 48 |
+
- dataset split id
|
| 49 |
+
- all modes on a track must share the same eval protocol signature
|
| 50 |
+
- the current hybrid battery has no dedicated anchor track; `anchor_pass` is vacuously `true`
|
| 51 |
+
|
| 52 |
+
## Normalized result schema
|
| 53 |
+
|
| 54 |
+
Each external benchmark run should be converted to one JSON object with:
|
| 55 |
+
|
| 56 |
+
- `track_id`
|
| 57 |
+
- `adapter_mode`
|
| 58 |
+
- `successes` or `success_rate`
|
| 59 |
+
- `episodes`
|
| 60 |
+
- `seed`
|
| 61 |
+
- `eval_protocol`
|
| 62 |
+
- for target tracks: `train_spec`
|
| 63 |
+
- optional diagnostics:
|
| 64 |
+
- `intervention_rate`
|
| 65 |
+
- `non_base_selection_rate`
|
| 66 |
+
- `steps_to_first_reveal_or_access`
|
| 67 |
+
- `steps_to_retrieve`
|
| 68 |
+
- `disturbance_proxy`
|
| 69 |
+
|
| 70 |
+
## Commands
|
| 71 |
+
|
| 72 |
+
Write the default manifest:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
python -m eval.run_public_benchmark_package \
|
| 76 |
+
--write-default-manifest /workspace/public_hybrid_benchmark_v1.json
|
| 77 |
+
```
|
| 78 |
+
|
| 79 |
+
Run the full smoke battery:
|
| 80 |
+
|
| 81 |
+
```bash
|
| 82 |
+
python -m eval.run_hybrid_public_benchmark_smoke \
|
| 83 |
+
--output-dir /workspace/reports/public_hybrid_benchmark_smoke_v1
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
Run a suite-specific smoke battery:
|
| 87 |
+
|
| 88 |
+
```bash
|
| 89 |
+
xvfb-run -a -s "-screen 0 1280x1024x24" \
|
| 90 |
+
/workspace/envs/rlbench/bin/python -m eval.run_rlbench_hybrid_smoke
|
| 91 |
+
|
| 92 |
+
/workspace/envs/isaacsim/bin/python -m eval.run_dexgarmentlab_hybrid_smoke
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
Summarize normalized multi-mode results:
|
| 96 |
+
|
| 97 |
+
```bash
|
| 98 |
+
python -m eval.run_public_benchmark_package \
|
| 99 |
+
--result /abs/path/rlbench2_put_bottle_in_fridge_adapter_active_seed17.json \
|
| 100 |
+
--result /abs/path/dexgarmentlab_hang_coat_trunk_seed17.json \
|
| 101 |
+
--output-dir /workspace/reports/public_hybrid_benchmark_v1
|
| 102 |
+
```
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_hybrid_public_benchmark_smoke.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
import argparse
|
| 4 |
+
import json
|
| 5 |
+
import os
|
| 6 |
+
import subprocess
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
from typing import Any
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
REPO_CODE_ROOT = Path(__file__).resolve().parents[1]
|
| 13 |
+
WORKSPACE_ROOT = Path("/workspace")
|
| 14 |
+
DEFAULT_REPORT_DIR = WORKSPACE_ROOT / "reports" / "public_hybrid_benchmark_smoke_v1"
|
| 15 |
+
DEFAULT_RLBENCH_PYTHON = WORKSPACE_ROOT / "envs" / "rlbench" / "bin" / "python"
|
| 16 |
+
DEFAULT_DEX_PYTHON = WORKSPACE_ROOT / "envs" / "isaacsim" / "bin" / "python"
|
| 17 |
+
DEFAULT_COPPELIASIM_ROOT = WORKSPACE_ROOT / "assets" / "coppeliasim_v4_1_0"
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _run(command: list[str], *, env: dict[str, str]) -> subprocess.CompletedProcess[str]:
|
| 21 |
+
return subprocess.run(command, capture_output=True, text=True, check=False, env=env)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _summary_path(suite_output_dir: Path, filename: str) -> Path:
|
| 25 |
+
return suite_output_dir / filename
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def _load_json(path: Path) -> dict[str, Any]:
|
| 29 |
+
with path.open("r", encoding="utf-8") as handle:
|
| 30 |
+
return json.load(handle)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _parse_args() -> argparse.Namespace:
|
| 34 |
+
parser = argparse.ArgumentParser(description="Run the full hybrid public benchmark smoke battery.")
|
| 35 |
+
parser.add_argument("--output-dir", type=Path, default=DEFAULT_REPORT_DIR)
|
| 36 |
+
parser.add_argument("--adapter-mode", type=str, default="trunk_only_ft")
|
| 37 |
+
parser.add_argument("--seed", type=int, default=17)
|
| 38 |
+
parser.add_argument("--rlbench-python", type=Path, default=DEFAULT_RLBENCH_PYTHON)
|
| 39 |
+
parser.add_argument("--dex-python", type=Path, default=DEFAULT_DEX_PYTHON)
|
| 40 |
+
parser.add_argument("--skip-rlbench", action="store_true")
|
| 41 |
+
parser.add_argument("--skip-dex", action="store_true")
|
| 42 |
+
return parser.parse_args()
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def main() -> None:
|
| 46 |
+
args = _parse_args()
|
| 47 |
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
| 48 |
+
|
| 49 |
+
env = os.environ.copy()
|
| 50 |
+
env["PYTHONPATH"] = f"{REPO_CODE_ROOT}:{env.get('PYTHONPATH', '')}".rstrip(":")
|
| 51 |
+
|
| 52 |
+
suite_summaries: dict[str, Any] = {}
|
| 53 |
+
failures: list[str] = []
|
| 54 |
+
|
| 55 |
+
if not args.skip_rlbench:
|
| 56 |
+
rlbench_out = args.output_dir / "rlbench2"
|
| 57 |
+
rlbench_out.mkdir(parents=True, exist_ok=True)
|
| 58 |
+
rlbench_env = dict(env)
|
| 59 |
+
rlbench_env["COPPELIASIM_ROOT"] = rlbench_env.get("COPPELIASIM_ROOT", str(DEFAULT_COPPELIASIM_ROOT))
|
| 60 |
+
rlbench_env["LD_LIBRARY_PATH"] = (
|
| 61 |
+
f"{rlbench_env['COPPELIASIM_ROOT']}:{rlbench_env.get('LD_LIBRARY_PATH', '')}"
|
| 62 |
+
).rstrip(":")
|
| 63 |
+
rlbench_env["QT_QPA_PLATFORM_PLUGIN_PATH"] = rlbench_env.get(
|
| 64 |
+
"QT_QPA_PLATFORM_PLUGIN_PATH",
|
| 65 |
+
rlbench_env["COPPELIASIM_ROOT"],
|
| 66 |
+
)
|
| 67 |
+
rlbench_env["QT_PLUGIN_PATH"] = rlbench_env.get("QT_PLUGIN_PATH", rlbench_env["COPPELIASIM_ROOT"])
|
| 68 |
+
rlbench_env["XDG_RUNTIME_DIR"] = rlbench_env.get("XDG_RUNTIME_DIR", "/tmp/runtime-root")
|
| 69 |
+
rlbench_proc = _run(
|
| 70 |
+
[
|
| 71 |
+
"xvfb-run",
|
| 72 |
+
"-a",
|
| 73 |
+
"-s",
|
| 74 |
+
"-screen 0 1280x1024x24",
|
| 75 |
+
str(args.rlbench_python),
|
| 76 |
+
"-m",
|
| 77 |
+
"eval.run_rlbench_hybrid_smoke",
|
| 78 |
+
"--output-dir",
|
| 79 |
+
str(rlbench_out),
|
| 80 |
+
"--adapter-mode",
|
| 81 |
+
args.adapter_mode,
|
| 82 |
+
"--seed",
|
| 83 |
+
str(args.seed),
|
| 84 |
+
],
|
| 85 |
+
env=rlbench_env,
|
| 86 |
+
)
|
| 87 |
+
if rlbench_proc.returncode != 0:
|
| 88 |
+
failures.append("rlbench2")
|
| 89 |
+
suite_summaries["rlbench2"] = {
|
| 90 |
+
"returncode": rlbench_proc.returncode,
|
| 91 |
+
"stdout_tail": rlbench_proc.stdout.splitlines()[-20:],
|
| 92 |
+
"stderr_tail": rlbench_proc.stderr.splitlines()[-20:],
|
| 93 |
+
"summary_path": str(_summary_path(rlbench_out, "rlbench_hybrid_smoke_summary.json")),
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
if not args.skip_dex:
|
| 97 |
+
dex_out = args.output_dir / "dexgarmentlab"
|
| 98 |
+
dex_out.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
dex_proc = _run(
|
| 100 |
+
[
|
| 101 |
+
str(args.dex_python),
|
| 102 |
+
"-m",
|
| 103 |
+
"eval.run_dexgarmentlab_hybrid_smoke",
|
| 104 |
+
"--output-dir",
|
| 105 |
+
str(dex_out),
|
| 106 |
+
"--adapter-mode",
|
| 107 |
+
args.adapter_mode,
|
| 108 |
+
"--seed",
|
| 109 |
+
str(args.seed),
|
| 110 |
+
],
|
| 111 |
+
env=env,
|
| 112 |
+
)
|
| 113 |
+
if dex_proc.returncode != 0:
|
| 114 |
+
failures.append("dexgarmentlab")
|
| 115 |
+
suite_summaries["dexgarmentlab"] = {
|
| 116 |
+
"returncode": dex_proc.returncode,
|
| 117 |
+
"stdout_tail": dex_proc.stdout.splitlines()[-20:],
|
| 118 |
+
"stderr_tail": dex_proc.stderr.splitlines()[-20:],
|
| 119 |
+
"summary_path": str(_summary_path(dex_out, "dexgarmentlab_hybrid_smoke_summary.json")),
|
| 120 |
+
}
|
| 121 |
+
|
| 122 |
+
summary_payload = {
|
| 123 |
+
"adapter_mode": args.adapter_mode,
|
| 124 |
+
"seed": args.seed,
|
| 125 |
+
"suite_summaries": suite_summaries,
|
| 126 |
+
"failures": failures,
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
for suite_name, payload in list(suite_summaries.items()):
|
| 130 |
+
summary_path = Path(payload["summary_path"])
|
| 131 |
+
if summary_path.exists():
|
| 132 |
+
summary_payload["suite_summaries"][suite_name]["summary"] = _load_json(summary_path)
|
| 133 |
+
|
| 134 |
+
output_path = args.output_dir / "hybrid_public_benchmark_smoke_summary.json"
|
| 135 |
+
output_path.write_text(json.dumps(summary_payload, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
| 136 |
+
print(json.dumps({"summary_path": str(output_path), "failures": failures}, indent=2))
|
| 137 |
+
if failures:
|
| 138 |
+
raise SystemExit(1)
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
if __name__ == "__main__":
|
| 142 |
+
main()
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py
ADDED
|
@@ -0,0 +1,887 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from torch import Tensor, nn
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
@dataclass
|
| 10 |
+
class PlannerConfig:
|
| 11 |
+
hidden_dim: int = 512
|
| 12 |
+
num_candidates: int = 8
|
| 13 |
+
action_dim: int = 14
|
| 14 |
+
num_support_modes: int = 3
|
| 15 |
+
utility_margin: float = 0.1
|
| 16 |
+
corridor_weight: float = 1.0
|
| 17 |
+
persistence_weight: float = 0.5
|
| 18 |
+
proposal_weight: float = 0.5
|
| 19 |
+
task_progress_weight: float = 0.75
|
| 20 |
+
disturbance_weight: float = 0.75
|
| 21 |
+
reocclusion_weight: float = 0.5
|
| 22 |
+
visibility_weight: float = 0.25
|
| 23 |
+
num_heads: int = 4
|
| 24 |
+
num_layers: int = 2
|
| 25 |
+
num_phases: int = 5
|
| 26 |
+
num_arm_roles: int = 4
|
| 27 |
+
top_k: int = 4
|
| 28 |
+
belief_gain_weight: float = 1.0
|
| 29 |
+
visibility_gain_weight: float = 0.75
|
| 30 |
+
clearance_weight: float = 0.75
|
| 31 |
+
occluder_contact_weight: float = 0.5
|
| 32 |
+
grasp_affordance_weight: float = 0.75
|
| 33 |
+
support_stability_weight: float = 0.5
|
| 34 |
+
residual_weight: float = 0.5
|
| 35 |
+
retrieve_access_threshold: float = 0.15
|
| 36 |
+
retrieve_persistence_threshold: float = 0.15
|
| 37 |
+
retrieve_support_threshold: float = 0.25
|
| 38 |
+
retrieve_reocclusion_threshold: float = 0.6
|
| 39 |
+
adapter_confidence_threshold: float = 0.55
|
| 40 |
+
mode_preference_bonus: float = 3.0
|
| 41 |
+
premature_retrieve_penalty: float = 1.5
|
| 42 |
+
premature_insert_penalty: float = 0.75
|
| 43 |
+
premature_occlusion_sweep_penalty: float = 0.75
|
| 44 |
+
premature_maintain_penalty: float = 0.0
|
| 45 |
+
retrieve_stage_access_threshold: float = 0.45
|
| 46 |
+
retrieve_stage_reveal_threshold: float = 0.40
|
| 47 |
+
retrieve_stage_persistence_threshold: float = 0.20
|
| 48 |
+
retrieve_stage_support_threshold: float = 0.25
|
| 49 |
+
insert_stage_access_threshold: float = 0.40
|
| 50 |
+
insert_stage_visibility_threshold: float = 0.30
|
| 51 |
+
insert_stage_support_threshold: float = 0.25
|
| 52 |
+
occlusion_maintain_gap_min_access: float = 0.0
|
| 53 |
+
occlusion_maintain_gap_min_visibility: float = 0.0
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class RevealPlanner(nn.Module):
|
| 57 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 58 |
+
super().__init__()
|
| 59 |
+
self.config = config
|
| 60 |
+
summary_dim = (
|
| 61 |
+
config.action_dim * 2
|
| 62 |
+
+ 3
|
| 63 |
+
+ 3
|
| 64 |
+
+ 1
|
| 65 |
+
+ 3
|
| 66 |
+
+ 1
|
| 67 |
+
)
|
| 68 |
+
self.trunk = nn.Sequential(
|
| 69 |
+
nn.LayerNorm(summary_dim),
|
| 70 |
+
nn.Linear(summary_dim, config.hidden_dim),
|
| 71 |
+
nn.GELU(),
|
| 72 |
+
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 73 |
+
nn.GELU(),
|
| 74 |
+
)
|
| 75 |
+
self.success_head = nn.Linear(config.hidden_dim, 1)
|
| 76 |
+
self.risk_head = nn.Linear(config.hidden_dim, 1)
|
| 77 |
+
|
| 78 |
+
def summarize_candidates(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
|
| 79 |
+
candidate_mean = candidate_chunks.mean(dim=2)
|
| 80 |
+
candidate_terminal = candidate_chunks[:, :, -1]
|
| 81 |
+
corridor_prob = rollout_state["corridor_logits"].sigmoid().amax(dim=-1).mean(dim=-2)
|
| 82 |
+
persistence = rollout_state["persistence_horizon"].mean(dim=-2)
|
| 83 |
+
disturbance = rollout_state["disturbance_cost"].mean(dim=-1, keepdim=True)
|
| 84 |
+
reocclusion = rollout_state["reocclusion_logit"].sigmoid().mean(dim=-2)
|
| 85 |
+
uncertainty = rollout_state["uncertainty"].mean(dim=-1, keepdim=True)
|
| 86 |
+
return torch.cat(
|
| 87 |
+
[
|
| 88 |
+
candidate_mean,
|
| 89 |
+
candidate_terminal,
|
| 90 |
+
corridor_prob,
|
| 91 |
+
persistence,
|
| 92 |
+
disturbance,
|
| 93 |
+
reocclusion,
|
| 94 |
+
uncertainty,
|
| 95 |
+
],
|
| 96 |
+
dim=-1,
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
def score_rollouts(self, rollout_state: dict[str, Tensor], candidate_chunks: Tensor) -> dict[str, Tensor]:
|
| 100 |
+
features = self.summarize_candidates(candidate_chunks, rollout_state)
|
| 101 |
+
hidden = self.trunk(features)
|
| 102 |
+
success_logits = self.success_head(hidden).squeeze(-1)
|
| 103 |
+
risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
|
| 104 |
+
utility_scores = success_logits.sigmoid() - risk_values
|
| 105 |
+
return {
|
| 106 |
+
"planner_features": features,
|
| 107 |
+
"planner_hidden": hidden,
|
| 108 |
+
"success_logits": success_logits,
|
| 109 |
+
"risk_values": risk_values,
|
| 110 |
+
"utility_scores": utility_scores,
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
def select_best(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> dict[str, Tensor]:
|
| 114 |
+
outputs = self.score_rollouts(rollout_state=rollout_state, candidate_chunks=candidate_chunks)
|
| 115 |
+
best_idx = outputs["utility_scores"].argmax(dim=-1)
|
| 116 |
+
batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
|
| 117 |
+
return {
|
| 118 |
+
**outputs,
|
| 119 |
+
"best_indices": best_idx,
|
| 120 |
+
"best_chunk": candidate_chunks[batch_indices, best_idx],
|
| 121 |
+
}
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
class InteractionPlanner(nn.Module):
|
| 125 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 126 |
+
super().__init__()
|
| 127 |
+
self.config = config
|
| 128 |
+
step_dim = (
|
| 129 |
+
config.action_dim
|
| 130 |
+
+ config.num_phases
|
| 131 |
+
+ (2 * config.num_arm_roles)
|
| 132 |
+
+ config.num_support_modes
|
| 133 |
+
+ 7
|
| 134 |
+
)
|
| 135 |
+
self.step_proj = nn.Sequential(
|
| 136 |
+
nn.LayerNorm(step_dim),
|
| 137 |
+
nn.Linear(step_dim, config.hidden_dim),
|
| 138 |
+
nn.GELU(),
|
| 139 |
+
)
|
| 140 |
+
encoder_layer = nn.TransformerEncoderLayer(
|
| 141 |
+
d_model=config.hidden_dim,
|
| 142 |
+
nhead=config.num_heads,
|
| 143 |
+
dim_feedforward=config.hidden_dim * 4,
|
| 144 |
+
batch_first=True,
|
| 145 |
+
norm_first=True,
|
| 146 |
+
)
|
| 147 |
+
self.sequence_encoder = nn.TransformerEncoder(encoder_layer, num_layers=config.num_layers)
|
| 148 |
+
self.cls_token = nn.Parameter(torch.randn(1, 1, config.hidden_dim) * 0.02)
|
| 149 |
+
self.success_head = nn.Linear(config.hidden_dim, 1)
|
| 150 |
+
self.risk_head = nn.Linear(config.hidden_dim, 1)
|
| 151 |
+
self.score_head = nn.Linear(config.hidden_dim, 1)
|
| 152 |
+
|
| 153 |
+
def _mean_field(self, tensor: Tensor) -> Tensor:
|
| 154 |
+
return tensor.mean(dim=(-1, -2))
|
| 155 |
+
|
| 156 |
+
def summarize_trajectory(self, candidate_chunks: Tensor, rollout_state: dict[str, Tensor]) -> Tensor:
|
| 157 |
+
horizon = min(candidate_chunks.shape[2], rollout_state["phase_logits"].shape[2])
|
| 158 |
+
candidate_steps = candidate_chunks[:, :, :horizon]
|
| 159 |
+
phase_probs = rollout_state["phase_logits"][:, :, :horizon].softmax(dim=-1)
|
| 160 |
+
support_probs = rollout_state["support_mode_logits"][:, :, :horizon].softmax(dim=-1)
|
| 161 |
+
arm_role_probs = rollout_state["arm_role_logits"][:, :, :horizon].softmax(dim=-1).flatten(start_dim=-2)
|
| 162 |
+
target_mean = self._mean_field(rollout_state["target_field"][:, :, :horizon].sigmoid())
|
| 163 |
+
feasibility_mean = self._mean_field(rollout_state["actor_feasibility_field"][:, :, :horizon].sigmoid())
|
| 164 |
+
persistence_mean = self._mean_field(rollout_state["persistence_field"][:, :, :horizon])
|
| 165 |
+
risk_mean = self._mean_field(rollout_state["risk_field"][:, :, :horizon])
|
| 166 |
+
uncertainty_mean = self._mean_field(rollout_state["uncertainty_field"][:, :, :horizon])
|
| 167 |
+
role_gap = (
|
| 168 |
+
rollout_state["arm_role_logits"][:, :, :horizon, 0].softmax(dim=-1)
|
| 169 |
+
- rollout_state["arm_role_logits"][:, :, :horizon, 1].softmax(dim=-1)
|
| 170 |
+
).abs().mean(dim=-1, keepdim=True)
|
| 171 |
+
return torch.cat(
|
| 172 |
+
[
|
| 173 |
+
candidate_steps,
|
| 174 |
+
phase_probs,
|
| 175 |
+
arm_role_probs,
|
| 176 |
+
support_probs,
|
| 177 |
+
target_mean,
|
| 178 |
+
feasibility_mean,
|
| 179 |
+
persistence_mean,
|
| 180 |
+
risk_mean,
|
| 181 |
+
uncertainty_mean,
|
| 182 |
+
role_gap,
|
| 183 |
+
],
|
| 184 |
+
dim=-1,
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
def score_rollouts(
|
| 188 |
+
self,
|
| 189 |
+
rollout_state: dict[str, Tensor],
|
| 190 |
+
candidate_chunks: Tensor,
|
| 191 |
+
proposal_logits: Tensor | None = None,
|
| 192 |
+
) -> dict[str, Tensor]:
|
| 193 |
+
features = self.summarize_trajectory(candidate_chunks, rollout_state)
|
| 194 |
+
batch_size, num_candidates, horizon, _ = features.shape
|
| 195 |
+
flat_features = features.view(batch_size * num_candidates, horizon, -1)
|
| 196 |
+
hidden_steps = self.step_proj(flat_features)
|
| 197 |
+
cls = self.cls_token.expand(batch_size * num_candidates, -1, -1)
|
| 198 |
+
encoded = self.sequence_encoder(torch.cat([cls, hidden_steps], dim=1))
|
| 199 |
+
pooled = encoded[:, 0]
|
| 200 |
+
success_logits = self.success_head(pooled).view(batch_size, num_candidates).squeeze(-1)
|
| 201 |
+
risk_values = torch.sigmoid(self.risk_head(pooled)).view(batch_size, num_candidates).squeeze(-1)
|
| 202 |
+
utility_scores = self.score_head(pooled).view(batch_size, num_candidates).squeeze(-1)
|
| 203 |
+
utility_scores = utility_scores + success_logits.sigmoid() - risk_values
|
| 204 |
+
if proposal_logits is not None and proposal_logits.shape == utility_scores.shape:
|
| 205 |
+
utility_scores = utility_scores + self.config.proposal_weight * proposal_logits.sigmoid()
|
| 206 |
+
return {
|
| 207 |
+
"planner_features": features.mean(dim=2),
|
| 208 |
+
"planner_hidden": pooled.view(batch_size, num_candidates, -1),
|
| 209 |
+
"success_logits": success_logits,
|
| 210 |
+
"risk_values": risk_values,
|
| 211 |
+
"utility_scores": utility_scores,
|
| 212 |
+
}
|
| 213 |
+
|
| 214 |
+
def select_best(
|
| 215 |
+
self,
|
| 216 |
+
candidate_chunks: Tensor,
|
| 217 |
+
rollout_state: dict[str, Tensor],
|
| 218 |
+
proposal_logits: Tensor | None = None,
|
| 219 |
+
) -> dict[str, Tensor]:
|
| 220 |
+
outputs = self.score_rollouts(
|
| 221 |
+
rollout_state=rollout_state,
|
| 222 |
+
candidate_chunks=candidate_chunks,
|
| 223 |
+
proposal_logits=proposal_logits,
|
| 224 |
+
)
|
| 225 |
+
best_idx = outputs["utility_scores"].argmax(dim=-1)
|
| 226 |
+
batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
|
| 227 |
+
return {
|
| 228 |
+
**outputs,
|
| 229 |
+
"best_indices": best_idx,
|
| 230 |
+
"best_chunk": candidate_chunks[batch_indices, best_idx],
|
| 231 |
+
}
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
class StructuredElasticUtility(nn.Module):
|
| 235 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 236 |
+
super().__init__()
|
| 237 |
+
self.config = config
|
| 238 |
+
|
| 239 |
+
def _field_mean(self, tensor: Tensor) -> Tensor:
|
| 240 |
+
if tensor.ndim == 6:
|
| 241 |
+
return tensor.mean(dim=(-1, -2, -3))
|
| 242 |
+
if tensor.ndim == 5:
|
| 243 |
+
return tensor.mean(dim=(-1, -2))
|
| 244 |
+
if tensor.ndim == 4:
|
| 245 |
+
return tensor.mean(dim=(-1, -2))
|
| 246 |
+
return tensor
|
| 247 |
+
|
| 248 |
+
def _initial_scalar(self, state: dict[str, Tensor], key: str) -> Tensor:
|
| 249 |
+
value = state[key]
|
| 250 |
+
if value.ndim >= 4:
|
| 251 |
+
return value.mean(dim=tuple(range(1, value.ndim)))
|
| 252 |
+
if value.ndim == 3:
|
| 253 |
+
return value.mean(dim=(-1, -2))
|
| 254 |
+
if value.ndim == 2:
|
| 255 |
+
return value.mean(dim=-1)
|
| 256 |
+
return value
|
| 257 |
+
|
| 258 |
+
def forward(
|
| 259 |
+
self,
|
| 260 |
+
initial_state: dict[str, Tensor],
|
| 261 |
+
rollout_state: dict[str, Tensor],
|
| 262 |
+
candidate_chunks: Tensor,
|
| 263 |
+
) -> dict[str, Tensor]:
|
| 264 |
+
initial_belief = self._initial_scalar(initial_state, "target_belief_field").unsqueeze(1)
|
| 265 |
+
initial_visibility = self._initial_scalar(initial_state, "visibility_field").unsqueeze(1)
|
| 266 |
+
belief_future = self._field_mean(rollout_state["target_belief_field"]).mean(dim=-1)
|
| 267 |
+
visibility_future = self._field_mean(rollout_state["visibility_field"]).mean(dim=-1)
|
| 268 |
+
clearance = self._field_mean(rollout_state["clearance_field"]).mean(dim=-1)
|
| 269 |
+
occluder_contact = self._field_mean(rollout_state["occluder_contact_field"]).mean(dim=-1)
|
| 270 |
+
grasp_affordance = self._field_mean(rollout_state["grasp_affordance_field"]).mean(dim=-1)
|
| 271 |
+
support_stability = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).mean(dim=-1)
|
| 272 |
+
persistence_traj = self._field_mean(rollout_state["persistence_field"])
|
| 273 |
+
reocclusion_traj = self._field_mean(rollout_state["reocclusion_field"])
|
| 274 |
+
disturbance_traj = self._field_mean(rollout_state["disturbance_field"])
|
| 275 |
+
access_traj = torch.sigmoid(self._field_mean(rollout_state["access_field"]))
|
| 276 |
+
persistence = persistence_traj.mean(dim=-1)
|
| 277 |
+
reocclusion = reocclusion_traj.mean(dim=-1)
|
| 278 |
+
disturbance = disturbance_traj.mean(dim=-1)
|
| 279 |
+
access_quality = access_traj.mean(dim=-1)
|
| 280 |
+
access_floor = access_traj.amin(dim=-1)
|
| 281 |
+
persistence_floor = persistence_traj.amin(dim=-1)
|
| 282 |
+
support_floor = torch.sigmoid(self._field_mean(rollout_state["support_stability_field"])).amin(dim=-1)
|
| 283 |
+
reocclusion_worst = reocclusion_traj.amax(dim=-1)
|
| 284 |
+
retrieve_progress = torch.sigmoid(candidate_chunks[:, :, :, -1]).mean(dim=-1)
|
| 285 |
+
utility = (
|
| 286 |
+
self.config.belief_gain_weight * (belief_future - initial_belief)
|
| 287 |
+
+ self.config.visibility_gain_weight * (visibility_future - initial_visibility)
|
| 288 |
+
+ self.config.clearance_weight * clearance
|
| 289 |
+
+ self.config.occluder_contact_weight * occluder_contact
|
| 290 |
+
+ self.config.grasp_affordance_weight * grasp_affordance
|
| 291 |
+
+ self.config.persistence_weight * persistence
|
| 292 |
+
+ self.config.support_stability_weight * support_stability
|
| 293 |
+
+ self.config.corridor_weight * access_quality
|
| 294 |
+
+ self.config.task_progress_weight * retrieve_progress
|
| 295 |
+
- self.config.reocclusion_weight * reocclusion
|
| 296 |
+
- self.config.disturbance_weight * disturbance
|
| 297 |
+
- self.config.visibility_weight * (1.0 - visibility_future)
|
| 298 |
+
)
|
| 299 |
+
return {
|
| 300 |
+
"belief_gain": belief_future - initial_belief,
|
| 301 |
+
"visibility_gain": visibility_future - initial_visibility,
|
| 302 |
+
"clearance": clearance,
|
| 303 |
+
"occluder_contact_quality": occluder_contact,
|
| 304 |
+
"grasp_affordance": grasp_affordance,
|
| 305 |
+
"persistence": persistence,
|
| 306 |
+
"support_stability": support_stability,
|
| 307 |
+
"reocclusion_penalty": reocclusion,
|
| 308 |
+
"reocclusion_worst": reocclusion_worst,
|
| 309 |
+
"disturbance_penalty": disturbance,
|
| 310 |
+
"access_quality": access_quality,
|
| 311 |
+
"access_floor": access_floor,
|
| 312 |
+
"persistence_floor": persistence_floor,
|
| 313 |
+
"support_floor": support_floor,
|
| 314 |
+
"task_progress": retrieve_progress,
|
| 315 |
+
"utility_structured": utility,
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
class ResidualPlannerScorer(nn.Module):
|
| 320 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 321 |
+
super().__init__()
|
| 322 |
+
feature_dim = (config.action_dim * 2) + 11
|
| 323 |
+
self.trunk = nn.Sequential(
|
| 324 |
+
nn.LayerNorm(feature_dim),
|
| 325 |
+
nn.Linear(feature_dim, config.hidden_dim),
|
| 326 |
+
nn.GELU(),
|
| 327 |
+
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 328 |
+
nn.GELU(),
|
| 329 |
+
)
|
| 330 |
+
self.success_head = nn.Linear(config.hidden_dim, 1)
|
| 331 |
+
self.risk_head = nn.Linear(config.hidden_dim, 1)
|
| 332 |
+
self.residual_head = nn.Linear(config.hidden_dim, 1)
|
| 333 |
+
|
| 334 |
+
def forward(
|
| 335 |
+
self,
|
| 336 |
+
candidate_chunks: Tensor,
|
| 337 |
+
structured: dict[str, Tensor],
|
| 338 |
+
proposal_logits: Tensor | None = None,
|
| 339 |
+
) -> dict[str, Tensor]:
|
| 340 |
+
candidate_mean = candidate_chunks.mean(dim=2)
|
| 341 |
+
candidate_terminal = candidate_chunks[:, :, -1]
|
| 342 |
+
components = torch.stack(
|
| 343 |
+
[
|
| 344 |
+
structured["belief_gain"],
|
| 345 |
+
structured["visibility_gain"],
|
| 346 |
+
structured["clearance"],
|
| 347 |
+
structured["occluder_contact_quality"],
|
| 348 |
+
structured["grasp_affordance"],
|
| 349 |
+
structured["persistence"],
|
| 350 |
+
structured["support_stability"],
|
| 351 |
+
structured["reocclusion_penalty"],
|
| 352 |
+
structured["disturbance_penalty"],
|
| 353 |
+
structured["access_quality"],
|
| 354 |
+
structured["task_progress"],
|
| 355 |
+
],
|
| 356 |
+
dim=-1,
|
| 357 |
+
)
|
| 358 |
+
features = torch.cat([candidate_mean, candidate_terminal, components], dim=-1)
|
| 359 |
+
hidden = self.trunk(features)
|
| 360 |
+
success_logits = self.success_head(hidden).squeeze(-1)
|
| 361 |
+
risk_values = torch.sigmoid(self.risk_head(hidden)).squeeze(-1)
|
| 362 |
+
residual = self.residual_head(hidden).squeeze(-1)
|
| 363 |
+
if proposal_logits is not None and proposal_logits.shape == residual.shape:
|
| 364 |
+
residual = residual + 0.25 * proposal_logits.sigmoid()
|
| 365 |
+
return {
|
| 366 |
+
"planner_hidden": hidden,
|
| 367 |
+
"success_logits": success_logits,
|
| 368 |
+
"risk_values": risk_values,
|
| 369 |
+
"utility_residual": residual,
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
class CascadePlanner(nn.Module):
|
| 374 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 375 |
+
super().__init__()
|
| 376 |
+
self.config = config
|
| 377 |
+
self.structured = StructuredElasticUtility(config)
|
| 378 |
+
self.residual = ResidualPlannerScorer(config)
|
| 379 |
+
|
| 380 |
+
def shortlist(
|
| 381 |
+
self,
|
| 382 |
+
proposal_logits: Tensor | None,
|
| 383 |
+
candidate_chunks: Tensor,
|
| 384 |
+
proposal_mode_assignments: Tensor | None = None,
|
| 385 |
+
) -> Tensor:
|
| 386 |
+
batch_size, num_candidates = candidate_chunks.shape[:2]
|
| 387 |
+
top_k = min(max(1, self.config.top_k), num_candidates)
|
| 388 |
+
if proposal_logits is None:
|
| 389 |
+
cheap_scores = -candidate_chunks.square().mean(dim=(-1, -2))
|
| 390 |
+
else:
|
| 391 |
+
cheap_scores = proposal_logits
|
| 392 |
+
if proposal_mode_assignments is None:
|
| 393 |
+
return cheap_scores.topk(top_k, dim=-1).indices
|
| 394 |
+
if proposal_mode_assignments.ndim == 1:
|
| 395 |
+
proposal_mode_assignments = proposal_mode_assignments.unsqueeze(0).expand(batch_size, -1)
|
| 396 |
+
|
| 397 |
+
shortlisted = []
|
| 398 |
+
for batch_idx in range(batch_size):
|
| 399 |
+
scores = cheap_scores[batch_idx]
|
| 400 |
+
mode_ids = proposal_mode_assignments[batch_idx]
|
| 401 |
+
mode_best: list[tuple[float, int]] = []
|
| 402 |
+
for mode_id in torch.unique(mode_ids):
|
| 403 |
+
mode_indices = torch.nonzero(mode_ids == mode_id, as_tuple=False).squeeze(-1)
|
| 404 |
+
best_local = mode_indices[scores[mode_indices].argmax()]
|
| 405 |
+
mode_best.append((float(scores[best_local].detach()), int(best_local)))
|
| 406 |
+
mode_best.sort(key=lambda item: item[0], reverse=True)
|
| 407 |
+
chosen = [index for _, index in mode_best[:top_k]]
|
| 408 |
+
if len(chosen) < top_k:
|
| 409 |
+
for candidate_idx in scores.argsort(descending=True).tolist():
|
| 410 |
+
if candidate_idx not in chosen:
|
| 411 |
+
chosen.append(candidate_idx)
|
| 412 |
+
if len(chosen) >= top_k:
|
| 413 |
+
break
|
| 414 |
+
shortlisted.append(torch.as_tensor(chosen[:top_k], device=candidate_chunks.device, dtype=torch.long))
|
| 415 |
+
return torch.stack(shortlisted, dim=0)
|
| 416 |
+
|
| 417 |
+
def select_best(
|
| 418 |
+
self,
|
| 419 |
+
initial_state: dict[str, Tensor],
|
| 420 |
+
candidate_chunks: Tensor,
|
| 421 |
+
rollout_state: dict[str, Tensor],
|
| 422 |
+
proposal_logits: Tensor | None = None,
|
| 423 |
+
candidate_indices: Tensor | None = None,
|
| 424 |
+
proposal_mode_names: list[list[str]] | None = None,
|
| 425 |
+
) -> dict[str, Tensor]:
|
| 426 |
+
structured = self.structured(
|
| 427 |
+
initial_state=initial_state,
|
| 428 |
+
rollout_state=rollout_state,
|
| 429 |
+
candidate_chunks=candidate_chunks,
|
| 430 |
+
)
|
| 431 |
+
residual = self.residual(
|
| 432 |
+
candidate_chunks=candidate_chunks,
|
| 433 |
+
structured=structured,
|
| 434 |
+
proposal_logits=proposal_logits,
|
| 435 |
+
)
|
| 436 |
+
utility_total = structured["utility_structured"] + self.config.residual_weight * residual["utility_residual"]
|
| 437 |
+
utility_total = utility_total + residual["success_logits"].sigmoid() - residual["risk_values"]
|
| 438 |
+
feasibility_penalty = torch.zeros_like(utility_total)
|
| 439 |
+
if proposal_mode_names is not None:
|
| 440 |
+
retrieve_like = torch.zeros_like(utility_total, dtype=torch.bool)
|
| 441 |
+
for batch_idx, names in enumerate(proposal_mode_names):
|
| 442 |
+
for candidate_idx, name in enumerate(names[: utility_total.shape[1]]):
|
| 443 |
+
retrieve_like[batch_idx, candidate_idx] = any(
|
| 444 |
+
token in name for token in ("retrieve", "insert_actor", "probe_inside")
|
| 445 |
+
)
|
| 446 |
+
blocked = (
|
| 447 |
+
(structured["access_floor"] < 0.15)
|
| 448 |
+
| (structured["persistence_floor"] < 0.15)
|
| 449 |
+
| (structured["support_floor"] < 0.25)
|
| 450 |
+
| (structured["reocclusion_worst"] > 0.6)
|
| 451 |
+
)
|
| 452 |
+
feasibility_penalty = retrieve_like.to(dtype=utility_total.dtype) * blocked.to(dtype=utility_total.dtype) * 2.0
|
| 453 |
+
utility_total = utility_total - feasibility_penalty
|
| 454 |
+
best_local = utility_total.argmax(dim=-1)
|
| 455 |
+
batch_indices = torch.arange(candidate_chunks.shape[0], device=candidate_chunks.device)
|
| 456 |
+
if candidate_indices is None:
|
| 457 |
+
best_indices = best_local
|
| 458 |
+
else:
|
| 459 |
+
best_indices = candidate_indices[batch_indices, best_local]
|
| 460 |
+
return {
|
| 461 |
+
**structured,
|
| 462 |
+
**residual,
|
| 463 |
+
"utility_total": utility_total,
|
| 464 |
+
"utility_scores": utility_total,
|
| 465 |
+
"feasibility_penalty": feasibility_penalty,
|
| 466 |
+
"best_indices": best_indices,
|
| 467 |
+
"best_chunk": candidate_chunks[batch_indices, best_local],
|
| 468 |
+
"ranking_diagnostics": {
|
| 469 |
+
"topk_indices": candidate_indices if candidate_indices is not None else best_local.unsqueeze(-1),
|
| 470 |
+
"best_local_indices": best_local,
|
| 471 |
+
},
|
| 472 |
+
}
|
| 473 |
+
|
| 474 |
+
|
| 475 |
+
def _summary_scalar(state: dict[str, Tensor], key: str, fallback_keys: tuple[str, ...] = ()) -> Tensor:
|
| 476 |
+
for candidate in (key, *fallback_keys):
|
| 477 |
+
value = state.get(candidate)
|
| 478 |
+
if value is None:
|
| 479 |
+
continue
|
| 480 |
+
if value.ndim >= 5:
|
| 481 |
+
return value.mean(dim=tuple(range(value.ndim - 2, value.ndim))).mean(dim=-1)
|
| 482 |
+
if value.ndim == 4:
|
| 483 |
+
return value.mean(dim=(-1, -2))
|
| 484 |
+
if value.ndim == 3:
|
| 485 |
+
return value
|
| 486 |
+
if value.ndim == 2:
|
| 487 |
+
return value
|
| 488 |
+
return value.unsqueeze(-1)
|
| 489 |
+
raise KeyError(f"Missing summary key {key} and fallbacks {fallback_keys}.")
|
| 490 |
+
|
| 491 |
+
|
| 492 |
+
def _optional_summary_scalar(
|
| 493 |
+
state: dict[str, Tensor],
|
| 494 |
+
key: str,
|
| 495 |
+
*,
|
| 496 |
+
reference: Tensor,
|
| 497 |
+
fallback_keys: tuple[str, ...] = (),
|
| 498 |
+
) -> Tensor:
|
| 499 |
+
try:
|
| 500 |
+
return _summary_scalar(state, key, fallback_keys)
|
| 501 |
+
except KeyError:
|
| 502 |
+
return torch.zeros_like(reference)
|
| 503 |
+
|
| 504 |
+
|
| 505 |
+
class ElasticFeasibilityGate(nn.Module):
|
| 506 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 507 |
+
super().__init__()
|
| 508 |
+
self.config = config
|
| 509 |
+
|
| 510 |
+
def forward(
|
| 511 |
+
self,
|
| 512 |
+
*,
|
| 513 |
+
rollout_state: dict[str, Tensor],
|
| 514 |
+
proposal_mode_names: list[list[str]],
|
| 515 |
+
) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]:
|
| 516 |
+
access = _summary_scalar(rollout_state, "access_summary", ("access_quality",))
|
| 517 |
+
persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon"))
|
| 518 |
+
support = _summary_scalar(rollout_state, "support_summary", ("support_stability",))
|
| 519 |
+
reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",))
|
| 520 |
+
disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",))
|
| 521 |
+
access_floor = access.amin(dim=-1)
|
| 522 |
+
persistence_floor = persistence.amin(dim=-1)
|
| 523 |
+
support_floor = support.amin(dim=-1)
|
| 524 |
+
reocclusion_worst = reocclusion.amax(dim=-1)
|
| 525 |
+
disturbance_worst = disturbance.amax(dim=-1)
|
| 526 |
+
|
| 527 |
+
blocked = (
|
| 528 |
+
(access_floor < self.config.retrieve_access_threshold)
|
| 529 |
+
| (persistence_floor < self.config.retrieve_persistence_threshold)
|
| 530 |
+
| (support_floor < self.config.retrieve_support_threshold)
|
| 531 |
+
| (reocclusion_worst > self.config.retrieve_reocclusion_threshold)
|
| 532 |
+
)
|
| 533 |
+
penalties = blocked.to(dtype=access.dtype) * 2.0
|
| 534 |
+
allowed_mask = torch.ones_like(access_floor, dtype=torch.bool)
|
| 535 |
+
reject_diagnostics: list[list[dict[str, float | bool | str]]] = []
|
| 536 |
+
for batch_idx, names in enumerate(proposal_mode_names):
|
| 537 |
+
sample_records: list[dict[str, float | bool | str]] = []
|
| 538 |
+
for candidate_idx, name in enumerate(names[: access_floor.shape[1]]):
|
| 539 |
+
retrieve_like = any(token in name for token in ("retrieve", "insert_actor", "probe_inside"))
|
| 540 |
+
candidate_blocked = bool(retrieve_like and blocked[batch_idx, candidate_idx])
|
| 541 |
+
if candidate_blocked:
|
| 542 |
+
allowed_mask[batch_idx, candidate_idx] = False
|
| 543 |
+
sample_records.append(
|
| 544 |
+
{
|
| 545 |
+
"mode_name": name,
|
| 546 |
+
"retrieve_like": retrieve_like,
|
| 547 |
+
"blocked": candidate_blocked,
|
| 548 |
+
"access_floor": float(access_floor[batch_idx, candidate_idx].detach()),
|
| 549 |
+
"persistence_floor": float(persistence_floor[batch_idx, candidate_idx].detach()),
|
| 550 |
+
"support_floor": float(support_floor[batch_idx, candidate_idx].detach()),
|
| 551 |
+
"reocclusion_worst": float(reocclusion_worst[batch_idx, candidate_idx].detach()),
|
| 552 |
+
"disturbance_worst": float(disturbance_worst[batch_idx, candidate_idx].detach()),
|
| 553 |
+
}
|
| 554 |
+
)
|
| 555 |
+
reject_diagnostics.append(sample_records)
|
| 556 |
+
|
| 557 |
+
confidence = torch.sigmoid(
|
| 558 |
+
2.0 * access.mean(dim=-1)
|
| 559 |
+
+ 1.5 * persistence.mean(dim=-1)
|
| 560 |
+
+ 1.5 * support.mean(dim=-1)
|
| 561 |
+
- 1.5 * reocclusion.mean(dim=-1)
|
| 562 |
+
- disturbance.mean(dim=-1)
|
| 563 |
+
)
|
| 564 |
+
return {
|
| 565 |
+
"allowed_mask": allowed_mask,
|
| 566 |
+
"penalties": penalties,
|
| 567 |
+
"blocked_mask": blocked,
|
| 568 |
+
"adapter_confidence": confidence,
|
| 569 |
+
"gate_access_floor": access_floor,
|
| 570 |
+
"gate_persistence_floor": persistence_floor,
|
| 571 |
+
"gate_support_floor": support_floor,
|
| 572 |
+
"gate_reocclusion_worst": reocclusion_worst,
|
| 573 |
+
"reject_diagnostics": reject_diagnostics,
|
| 574 |
+
}
|
| 575 |
+
|
| 576 |
+
|
| 577 |
+
class ResidualActionReranker(nn.Module):
|
| 578 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 579 |
+
super().__init__()
|
| 580 |
+
feature_dim = (config.action_dim * 2) + 8
|
| 581 |
+
self.network = nn.Sequential(
|
| 582 |
+
nn.LayerNorm(feature_dim),
|
| 583 |
+
nn.Linear(feature_dim, config.hidden_dim),
|
| 584 |
+
nn.GELU(),
|
| 585 |
+
nn.Linear(config.hidden_dim, config.hidden_dim),
|
| 586 |
+
nn.GELU(),
|
| 587 |
+
)
|
| 588 |
+
self.score_head = nn.Linear(config.hidden_dim, 1)
|
| 589 |
+
self.success_head = nn.Linear(config.hidden_dim, 1)
|
| 590 |
+
self.risk_head = nn.Linear(config.hidden_dim, 1)
|
| 591 |
+
|
| 592 |
+
def forward(
|
| 593 |
+
self,
|
| 594 |
+
*,
|
| 595 |
+
candidate_chunks: Tensor,
|
| 596 |
+
rollout_state: dict[str, Tensor],
|
| 597 |
+
proposal_logits: Tensor | None,
|
| 598 |
+
) -> dict[str, Tensor]:
|
| 599 |
+
candidate_mean = candidate_chunks.mean(dim=2)
|
| 600 |
+
candidate_terminal = candidate_chunks[:, :, -1]
|
| 601 |
+
visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",))
|
| 602 |
+
access = _summary_scalar(rollout_state, "access_summary", ("access_quality",))
|
| 603 |
+
persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon"))
|
| 604 |
+
support = _summary_scalar(rollout_state, "support_summary", ("support_stability",))
|
| 605 |
+
reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",))
|
| 606 |
+
disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",))
|
| 607 |
+
fold_preservation = _optional_summary_scalar(
|
| 608 |
+
rollout_state,
|
| 609 |
+
"fold_preservation_summary",
|
| 610 |
+
reference=visibility,
|
| 611 |
+
fallback_keys=("fold_preservation",),
|
| 612 |
+
)
|
| 613 |
+
lift_risk = _optional_summary_scalar(
|
| 614 |
+
rollout_state,
|
| 615 |
+
"lift_too_much_risk_summary",
|
| 616 |
+
reference=visibility,
|
| 617 |
+
fallback_keys=("lift_too_much_risk",),
|
| 618 |
+
)
|
| 619 |
+
features = torch.cat(
|
| 620 |
+
[
|
| 621 |
+
candidate_mean,
|
| 622 |
+
candidate_terminal,
|
| 623 |
+
visibility.mean(dim=-1, keepdim=True),
|
| 624 |
+
access.mean(dim=-1, keepdim=True),
|
| 625 |
+
persistence.mean(dim=-1, keepdim=True),
|
| 626 |
+
support.mean(dim=-1, keepdim=True),
|
| 627 |
+
reocclusion.mean(dim=-1, keepdim=True),
|
| 628 |
+
disturbance.mean(dim=-1, keepdim=True),
|
| 629 |
+
fold_preservation.mean(dim=-1, keepdim=True),
|
| 630 |
+
lift_risk.mean(dim=-1, keepdim=True),
|
| 631 |
+
],
|
| 632 |
+
dim=-1,
|
| 633 |
+
)
|
| 634 |
+
hidden = self.network(features)
|
| 635 |
+
residual = self.score_head(hidden).squeeze(-1)
|
| 636 |
+
success = self.success_head(hidden).squeeze(-1)
|
| 637 |
+
risk = torch.sigmoid(self.risk_head(hidden).squeeze(-1))
|
| 638 |
+
if proposal_logits is not None and proposal_logits.shape == residual.shape:
|
| 639 |
+
residual = residual + 0.25 * proposal_logits.sigmoid()
|
| 640 |
+
return {
|
| 641 |
+
"residual_scores": residual,
|
| 642 |
+
"planner_success_logits": success,
|
| 643 |
+
"planner_risk_values": risk,
|
| 644 |
+
}
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
class AdapterPlanner(nn.Module):
|
| 648 |
+
def __init__(self, config: PlannerConfig) -> None:
|
| 649 |
+
super().__init__()
|
| 650 |
+
self.config = config
|
| 651 |
+
self.gate = ElasticFeasibilityGate(config)
|
| 652 |
+
self.reranker = ResidualActionReranker(config)
|
| 653 |
+
|
| 654 |
+
def select_best(
|
| 655 |
+
self,
|
| 656 |
+
*,
|
| 657 |
+
candidate_chunks: Tensor,
|
| 658 |
+
rollout_state: dict[str, Tensor],
|
| 659 |
+
proposal_mode_names: list[list[str]],
|
| 660 |
+
proposal_logits: Tensor | None = None,
|
| 661 |
+
planning_mode: str = "adapter_active",
|
| 662 |
+
) -> dict[str, Tensor | list[list[dict[str, float | bool | str]]]]:
|
| 663 |
+
batch_size = candidate_chunks.shape[0]
|
| 664 |
+
batch_indices = torch.arange(batch_size, device=candidate_chunks.device)
|
| 665 |
+
if planning_mode in {"identity", "trunk_only", "adapter_noop"}:
|
| 666 |
+
zero_scores = candidate_chunks.new_zeros((batch_size, candidate_chunks.shape[1]))
|
| 667 |
+
return {
|
| 668 |
+
"best_indices": torch.zeros(batch_size, dtype=torch.long, device=candidate_chunks.device),
|
| 669 |
+
"best_chunk": candidate_chunks[:, 0],
|
| 670 |
+
"utility_scores": zero_scores,
|
| 671 |
+
"utility_total": zero_scores,
|
| 672 |
+
"planner_success_logits": zero_scores,
|
| 673 |
+
"planner_risk_values": zero_scores,
|
| 674 |
+
"adapter_confidence": candidate_chunks.new_ones((batch_size, candidate_chunks.shape[1])),
|
| 675 |
+
"reject_diagnostics": [[] for _ in range(batch_size)],
|
| 676 |
+
"planning_mode": planning_mode,
|
| 677 |
+
}
|
| 678 |
+
|
| 679 |
+
gate_outputs = self.gate(rollout_state=rollout_state, proposal_mode_names=proposal_mode_names)
|
| 680 |
+
reranker = self.reranker(
|
| 681 |
+
candidate_chunks=candidate_chunks,
|
| 682 |
+
rollout_state=rollout_state,
|
| 683 |
+
proposal_logits=proposal_logits,
|
| 684 |
+
)
|
| 685 |
+
utility = reranker["residual_scores"] + reranker["planner_success_logits"].sigmoid() - reranker["planner_risk_values"]
|
| 686 |
+
visibility = _summary_scalar(rollout_state, "visibility_summary", ("visibility_gain",)).mean(dim=-1)
|
| 687 |
+
access = _summary_scalar(rollout_state, "access_summary", ("access_quality",)).mean(dim=-1)
|
| 688 |
+
persistence = _summary_scalar(rollout_state, "persistence_summary", ("persistence", "persistence_horizon")).mean(dim=-1)
|
| 689 |
+
support = _summary_scalar(rollout_state, "support_summary", ("support_stability",)).mean(dim=-1)
|
| 690 |
+
reocclusion = _summary_scalar(rollout_state, "reocclusion_summary", ("reocclusion_penalty",)).mean(dim=-1)
|
| 691 |
+
disturbance = _summary_scalar(rollout_state, "disturbance_summary", ("disturbance_penalty",)).mean(dim=-1)
|
| 692 |
+
fold_preservation = _optional_summary_scalar(
|
| 693 |
+
rollout_state,
|
| 694 |
+
"fold_preservation_summary",
|
| 695 |
+
reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
|
| 696 |
+
fallback_keys=("fold_preservation",),
|
| 697 |
+
).mean(dim=-1)
|
| 698 |
+
mouth_aperture = _optional_summary_scalar(
|
| 699 |
+
rollout_state,
|
| 700 |
+
"mouth_aperture_summary",
|
| 701 |
+
reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
|
| 702 |
+
fallback_keys=("mouth_aperture",),
|
| 703 |
+
).mean(dim=-1)
|
| 704 |
+
layer_separation = _optional_summary_scalar(
|
| 705 |
+
rollout_state,
|
| 706 |
+
"layer_separation_summary",
|
| 707 |
+
reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
|
| 708 |
+
fallback_keys=("layer_separation_quality",),
|
| 709 |
+
).mean(dim=-1)
|
| 710 |
+
lift_risk = _optional_summary_scalar(
|
| 711 |
+
rollout_state,
|
| 712 |
+
"lift_too_much_risk_summary",
|
| 713 |
+
reference=_summary_scalar(rollout_state, "access_summary", ("access_quality",)),
|
| 714 |
+
fallback_keys=("lift_too_much_risk",),
|
| 715 |
+
).mean(dim=-1)
|
| 716 |
+
mode_bias = utility.new_zeros(utility.shape)
|
| 717 |
+
stage_penalty = utility.new_zeros(utility.shape)
|
| 718 |
+
unresolved_reveal = (1.0 - visibility) + (1.0 - access)
|
| 719 |
+
stabilized_reveal = 0.5 * (access + persistence + support)
|
| 720 |
+
# Use optimistic scene readiness summaries for stage switching.
|
| 721 |
+
# Candidate-level safety is still enforced by the retrieve gate below, so
|
| 722 |
+
# we should not let one poor candidate keep the entire scene stuck in
|
| 723 |
+
# "reveal forever" mode when another candidate already makes retrieve feasible.
|
| 724 |
+
batch_visibility = visibility.amax(dim=1)
|
| 725 |
+
batch_access = access.amax(dim=1)
|
| 726 |
+
batch_persistence = persistence.amax(dim=1)
|
| 727 |
+
batch_support = support.amax(dim=1)
|
| 728 |
+
batch_reocclusion = reocclusion.amin(dim=1)
|
| 729 |
+
batch_disturbance = disturbance.amin(dim=1)
|
| 730 |
+
batch_fold = fold_preservation.amax(dim=1)
|
| 731 |
+
batch_mouth = mouth_aperture.amax(dim=1)
|
| 732 |
+
batch_layer = layer_separation.amax(dim=1)
|
| 733 |
+
batch_lift = lift_risk.amin(dim=1)
|
| 734 |
+
batch_reveal_readiness = torch.maximum(batch_visibility, batch_access)
|
| 735 |
+
for batch_idx, names in enumerate(proposal_mode_names):
|
| 736 |
+
is_bag = any(any(token in name for token in ("mouth", "rim", "probe_inside")) for name in names)
|
| 737 |
+
is_cloth = any(any(token in name for token in ("fold", "lift", "layer")) for name in names)
|
| 738 |
+
can_retrieve = (
|
| 739 |
+
batch_access[batch_idx] >= self.config.retrieve_stage_access_threshold
|
| 740 |
+
and batch_reveal_readiness[batch_idx] >= self.config.retrieve_stage_reveal_threshold
|
| 741 |
+
and batch_persistence[batch_idx] >= self.config.retrieve_stage_persistence_threshold
|
| 742 |
+
and batch_support[batch_idx] >= self.config.retrieve_stage_support_threshold
|
| 743 |
+
and batch_reocclusion[batch_idx] <= self.config.retrieve_reocclusion_threshold
|
| 744 |
+
)
|
| 745 |
+
if is_bag:
|
| 746 |
+
can_retrieve = bool(
|
| 747 |
+
can_retrieve
|
| 748 |
+
and batch_mouth[batch_idx] >= 0.30
|
| 749 |
+
and batch_persistence[batch_idx] >= 0.55
|
| 750 |
+
)
|
| 751 |
+
elif is_cloth:
|
| 752 |
+
can_retrieve = bool(
|
| 753 |
+
can_retrieve
|
| 754 |
+
and batch_layer[batch_idx] >= 0.18
|
| 755 |
+
and batch_fold[batch_idx] >= 0.60
|
| 756 |
+
and batch_lift[batch_idx] <= 0.30
|
| 757 |
+
and batch_support[batch_idx] >= 0.70
|
| 758 |
+
)
|
| 759 |
+
can_insert = (
|
| 760 |
+
batch_access[batch_idx] >= self.config.insert_stage_access_threshold
|
| 761 |
+
and batch_visibility[batch_idx] >= self.config.insert_stage_visibility_threshold
|
| 762 |
+
and batch_support[batch_idx] >= self.config.insert_stage_support_threshold
|
| 763 |
+
and batch_reocclusion[batch_idx] <= 0.65
|
| 764 |
+
)
|
| 765 |
+
maintain_ready = (
|
| 766 |
+
batch_access[batch_idx] >= self.config.occlusion_maintain_gap_min_access
|
| 767 |
+
and batch_visibility[batch_idx] >= self.config.occlusion_maintain_gap_min_visibility
|
| 768 |
+
)
|
| 769 |
+
if can_retrieve:
|
| 770 |
+
preferred_tokens = ("retrieve",)
|
| 771 |
+
elif can_insert:
|
| 772 |
+
preferred_tokens = ("probe_inside", "insert_actor") if is_bag else ("insert_actor",)
|
| 773 |
+
elif is_bag:
|
| 774 |
+
if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20:
|
| 775 |
+
preferred_tokens = ("widen_mouth", "maintain_mouth")
|
| 776 |
+
else:
|
| 777 |
+
preferred_tokens = ("maintain_mouth", "widen_mouth")
|
| 778 |
+
elif is_cloth:
|
| 779 |
+
if batch_access[batch_idx] < 0.15 or batch_visibility[batch_idx] < 0.20:
|
| 780 |
+
preferred_tokens = ("lift_edge", "separate_layer")
|
| 781 |
+
elif batch_lift[batch_idx] > 0.15 or batch_disturbance[batch_idx] > 0.25:
|
| 782 |
+
preferred_tokens = ("stabilize_fold", "maintain_lift")
|
| 783 |
+
else:
|
| 784 |
+
preferred_tokens = ("maintain_lift", "stabilize_fold")
|
| 785 |
+
else:
|
| 786 |
+
if not maintain_ready:
|
| 787 |
+
preferred_tokens = ("widen_gap", "pin_canopy", "sweep_left", "sweep_right")
|
| 788 |
+
elif batch_visibility[batch_idx] < 0.20 or batch_access[batch_idx] < 0.25:
|
| 789 |
+
preferred_tokens = ("widen_gap", "pin_canopy")
|
| 790 |
+
elif batch_disturbance[batch_idx] > 0.25 or batch_reocclusion[batch_idx] > 0.40:
|
| 791 |
+
preferred_tokens = ("maintain_gap", "pin_canopy")
|
| 792 |
+
else:
|
| 793 |
+
preferred_tokens = ("pin_canopy", "widen_gap")
|
| 794 |
+
for candidate_idx, name in enumerate(names[: utility.shape[1]]):
|
| 795 |
+
if name == "base_action":
|
| 796 |
+
continue
|
| 797 |
+
if any(token in name for token in ("retrieve",)):
|
| 798 |
+
bonus = (
|
| 799 |
+
0.85 * visibility[batch_idx, candidate_idx]
|
| 800 |
+
+ 0.85 * access[batch_idx, candidate_idx]
|
| 801 |
+
+ 0.65 * persistence[batch_idx, candidate_idx]
|
| 802 |
+
+ 0.50 * support[batch_idx, candidate_idx]
|
| 803 |
+
- 0.60 * reocclusion[batch_idx, candidate_idx]
|
| 804 |
+
- 0.25 * disturbance[batch_idx, candidate_idx]
|
| 805 |
+
)
|
| 806 |
+
elif any(token in name for token in ("insert_actor", "probe_inside")):
|
| 807 |
+
bonus = (
|
| 808 |
+
0.70 * visibility[batch_idx, candidate_idx]
|
| 809 |
+
+ 0.70 * access[batch_idx, candidate_idx]
|
| 810 |
+
+ 0.35 * persistence[batch_idx, candidate_idx]
|
| 811 |
+
- 0.35 * reocclusion[batch_idx, candidate_idx]
|
| 812 |
+
- 0.15 * disturbance[batch_idx, candidate_idx]
|
| 813 |
+
)
|
| 814 |
+
elif any(token in name for token in ("maintain", "stabilize", "pin_canopy")):
|
| 815 |
+
bonus = (
|
| 816 |
+
0.85 * stabilized_reveal[batch_idx, candidate_idx]
|
| 817 |
+
+ 0.25 * visibility[batch_idx, candidate_idx]
|
| 818 |
+
- 0.20 * reocclusion[batch_idx, candidate_idx]
|
| 819 |
+
- 0.10 * disturbance[batch_idx, candidate_idx]
|
| 820 |
+
)
|
| 821 |
+
else:
|
| 822 |
+
bonus = (
|
| 823 |
+
0.95 * unresolved_reveal[batch_idx, candidate_idx]
|
| 824 |
+
+ 0.20 * (1.0 - persistence[batch_idx, candidate_idx])
|
| 825 |
+
- 0.10 * disturbance[batch_idx, candidate_idx]
|
| 826 |
+
)
|
| 827 |
+
if any(token in name for token in ("fold", "lift", "layer")):
|
| 828 |
+
bonus = bonus + 0.35 * fold_preservation[batch_idx, candidate_idx] - 0.35 * lift_risk[batch_idx, candidate_idx]
|
| 829 |
+
if any(token in name for token in preferred_tokens):
|
| 830 |
+
bonus = bonus + self.config.mode_preference_bonus
|
| 831 |
+
elif "retrieve" in name and not can_retrieve:
|
| 832 |
+
bonus = bonus - self.config.premature_retrieve_penalty
|
| 833 |
+
stage_penalty[batch_idx, candidate_idx] = (
|
| 834 |
+
stage_penalty[batch_idx, candidate_idx] + self.config.premature_retrieve_penalty
|
| 835 |
+
)
|
| 836 |
+
elif is_cloth and any(token in name for token in ("stabilize", "maintain")) and any(
|
| 837 |
+
token in preferred_tokens for token in ("lift_edge", "separate_layer")
|
| 838 |
+
):
|
| 839 |
+
bonus = bonus - 1.0
|
| 840 |
+
stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.0
|
| 841 |
+
elif (not is_bag and not is_cloth) and any(token in name for token in ("sweep_left", "sweep_right")) and any(
|
| 842 |
+
token in preferred_tokens for token in ("pin_canopy", "widen_gap", "maintain_gap")
|
| 843 |
+
):
|
| 844 |
+
bonus = bonus - self.config.premature_occlusion_sweep_penalty
|
| 845 |
+
elif any(token in name for token in ("probe_inside", "insert_actor", "retrieve")) and not can_insert:
|
| 846 |
+
bonus = bonus - self.config.premature_insert_penalty
|
| 847 |
+
stage_penalty[batch_idx, candidate_idx] = (
|
| 848 |
+
stage_penalty[batch_idx, candidate_idx] + self.config.premature_insert_penalty
|
| 849 |
+
)
|
| 850 |
+
if (
|
| 851 |
+
(not is_bag and not is_cloth)
|
| 852 |
+
and "maintain_gap" in name
|
| 853 |
+
and not maintain_ready
|
| 854 |
+
and self.config.premature_maintain_penalty > 0.0
|
| 855 |
+
):
|
| 856 |
+
bonus = bonus - self.config.premature_maintain_penalty
|
| 857 |
+
stage_penalty[batch_idx, candidate_idx] = (
|
| 858 |
+
stage_penalty[batch_idx, candidate_idx] + self.config.premature_maintain_penalty
|
| 859 |
+
)
|
| 860 |
+
if is_bag and (batch_mouth[batch_idx] < 0.18 or batch_access[batch_idx] < 0.15) and "widen_mouth" in name:
|
| 861 |
+
stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5
|
| 862 |
+
if is_cloth and (batch_layer[batch_idx] < 0.12 or batch_visibility[batch_idx] < 0.05) and any(
|
| 863 |
+
token in name for token in ("lift_edge", "separate_layer")
|
| 864 |
+
):
|
| 865 |
+
stage_penalty[batch_idx, candidate_idx] = stage_penalty[batch_idx, candidate_idx] + 1.5
|
| 866 |
+
mode_bias[batch_idx, candidate_idx] = bonus
|
| 867 |
+
utility = utility + mode_bias
|
| 868 |
+
utility = utility + 0.5 * fold_preservation - 0.5 * lift_risk
|
| 869 |
+
utility = utility - stage_penalty
|
| 870 |
+
utility = utility - gate_outputs["penalties"]
|
| 871 |
+
allowed_mask = gate_outputs["allowed_mask"]
|
| 872 |
+
assert isinstance(allowed_mask, Tensor)
|
| 873 |
+
utility = utility.masked_fill(~allowed_mask, -1e6)
|
| 874 |
+
best_indices = utility.argmax(dim=-1)
|
| 875 |
+
best_chunk = candidate_chunks[batch_indices, best_indices]
|
| 876 |
+
return {
|
| 877 |
+
"best_indices": best_indices,
|
| 878 |
+
"best_chunk": best_chunk,
|
| 879 |
+
"utility_scores": utility,
|
| 880 |
+
"utility_total": utility,
|
| 881 |
+
"planner_success_logits": reranker["planner_success_logits"],
|
| 882 |
+
"planner_risk_values": reranker["planner_risk_values"],
|
| 883 |
+
"adapter_confidence": gate_outputs["adapter_confidence"],
|
| 884 |
+
"allowed_mask": gate_outputs["allowed_mask"],
|
| 885 |
+
"reject_diagnostics": gate_outputs["reject_diagnostics"],
|
| 886 |
+
"planning_mode": planning_mode,
|
| 887 |
+
}
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/losses.py
ADDED
|
@@ -0,0 +1,855 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
import torch.nn.functional as F
|
| 7 |
+
from torch import Tensor
|
| 8 |
+
|
| 9 |
+
from models.reveal_head import TASK_METRIC_NAMES, task_metric_valid_mask
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class LossWeights:
|
| 14 |
+
action: float = 1.0
|
| 15 |
+
phase: float = 0.05
|
| 16 |
+
arm_role: float = 0.2
|
| 17 |
+
support_mode: float = 0.1
|
| 18 |
+
corridor: float = 0.1
|
| 19 |
+
persistence: float = 0.05
|
| 20 |
+
disturbance: float = 0.05
|
| 21 |
+
world_model: float = 0.1
|
| 22 |
+
belief: float = 0.05
|
| 23 |
+
visibility: float = 0.05
|
| 24 |
+
clearance: float = 0.05
|
| 25 |
+
support_stability: float = 0.05
|
| 26 |
+
reocclusion: float = 0.05
|
| 27 |
+
occluder_contact: float = 0.05
|
| 28 |
+
grasp_affordance: float = 0.05
|
| 29 |
+
planner_success: float = 0.1
|
| 30 |
+
planner_risk: float = 0.05
|
| 31 |
+
planner_ranking: float = 0.05
|
| 32 |
+
proposal_reconstruction: float = 0.1
|
| 33 |
+
proposal_success: float = 0.05
|
| 34 |
+
proposal_ranking: float = 0.05
|
| 35 |
+
proposal_mode: float = 0.05
|
| 36 |
+
proposal_mode_cloth_only: bool = False
|
| 37 |
+
proposal_mode_task_filter: list[str] | None = None
|
| 38 |
+
proposal_diversity: float = 0.05
|
| 39 |
+
role_swap_consistency: float = 0.05
|
| 40 |
+
task_metrics: float = 0.05
|
| 41 |
+
transition: float = 0.0
|
| 42 |
+
gate: float = 0.0
|
| 43 |
+
distillation: float = 0.0
|
| 44 |
+
calibration: float = 0.0
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def chunk_bc_loss(pred_actions: Tensor, target_actions: Tensor, mask: Tensor | None = None) -> Tensor:
|
| 48 |
+
loss = F.smooth_l1_loss(pred_actions, target_actions, reduction="none")
|
| 49 |
+
if mask is not None:
|
| 50 |
+
loss = loss * mask.unsqueeze(-1)
|
| 51 |
+
return loss.sum() / mask.sum().clamp_min(1.0)
|
| 52 |
+
return loss.mean()
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _command_probability(command: Tensor) -> Tensor:
|
| 56 |
+
return (torch.tanh(command) + 1.0) * 0.5
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def infer_phase_targets_from_actions(action_chunk: Tensor) -> Tensor:
|
| 60 |
+
open_cmd = action_chunk[..., 0]
|
| 61 |
+
actor_reach = _command_probability(action_chunk[..., 8])
|
| 62 |
+
retrieve_cmd = _command_probability(action_chunk[..., 13])
|
| 63 |
+
|
| 64 |
+
retrieve = retrieve_cmd >= 0.55
|
| 65 |
+
recover = open_cmd <= -0.10
|
| 66 |
+
reveal = open_cmd > 0.35
|
| 67 |
+
hold = (~retrieve) & (~recover) & (~reveal) & (actor_reach >= 0.55)
|
| 68 |
+
|
| 69 |
+
phase_target = torch.zeros_like(open_cmd, dtype=torch.long)
|
| 70 |
+
phase_target = torch.where(reveal, torch.ones_like(phase_target), phase_target)
|
| 71 |
+
phase_target = torch.where(hold, torch.full_like(phase_target, 2), phase_target)
|
| 72 |
+
phase_target = torch.where(retrieve, torch.full_like(phase_target, 3), phase_target)
|
| 73 |
+
phase_target = torch.where(recover, torch.full_like(phase_target, 4), phase_target)
|
| 74 |
+
return phase_target
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _role_targets_like(arm_role_logits: Tensor) -> Tensor:
|
| 78 |
+
role_target = torch.as_tensor([1, 2], device=arm_role_logits.device, dtype=torch.long)
|
| 79 |
+
expand_shape = [1] * (arm_role_logits.ndim - 2) + [2]
|
| 80 |
+
return role_target.view(*expand_shape).expand(*arm_role_logits.shape[:-1])
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def swap_arm_actions(action_chunk: Tensor) -> Tensor:
|
| 84 |
+
midpoint = action_chunk.shape[-1] // 2
|
| 85 |
+
return torch.cat([action_chunk[..., midpoint:], action_chunk[..., :midpoint]], dim=-1)
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
def permutation_invariant_role_loss(arm_role_logits: Tensor) -> Tensor:
|
| 89 |
+
role_target = _role_targets_like(arm_role_logits)
|
| 90 |
+
swapped_target = role_target.flip(-1)
|
| 91 |
+
flat_logits = arm_role_logits.reshape(-1, arm_role_logits.shape[-1])
|
| 92 |
+
loss_a = F.cross_entropy(flat_logits, role_target.reshape(-1), reduction="none").view(*role_target.shape)
|
| 93 |
+
loss_b = F.cross_entropy(flat_logits, swapped_target.reshape(-1), reduction="none").view(*role_target.shape)
|
| 94 |
+
return torch.minimum(loss_a.sum(dim=-1), loss_b.sum(dim=-1)).mean()
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
def role_swap_consistency_loss(pred: Tensor, target: Tensor) -> Tensor:
|
| 98 |
+
return F.smooth_l1_loss(pred, target)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
def proposal_diversity_loss(proposal_candidates: Tensor, minimum_distance: float = 0.05) -> Tensor:
|
| 102 |
+
if proposal_candidates.ndim != 4 or proposal_candidates.shape[1] <= 1:
|
| 103 |
+
return proposal_candidates.new_tensor(0.0)
|
| 104 |
+
flat = proposal_candidates.flatten(start_dim=2)
|
| 105 |
+
distances = torch.cdist(flat, flat, p=1)
|
| 106 |
+
eye = torch.eye(distances.shape[-1], device=distances.device, dtype=torch.bool).unsqueeze(0)
|
| 107 |
+
valid = (~eye).expand(distances.shape[0], -1, -1)
|
| 108 |
+
if not valid.any():
|
| 109 |
+
return proposal_candidates.new_tensor(0.0)
|
| 110 |
+
return torch.relu(minimum_distance - distances[valid]).mean()
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def proposal_set_reconstruction_loss(proposal_candidates: Tensor, target_candidates: Tensor) -> Tensor:
|
| 114 |
+
if proposal_candidates.ndim != 4 or target_candidates.ndim != 4:
|
| 115 |
+
return proposal_candidates.new_tensor(0.0)
|
| 116 |
+
if proposal_candidates.shape[1] == 0 or target_candidates.shape[1] == 0:
|
| 117 |
+
return proposal_candidates.new_tensor(0.0)
|
| 118 |
+
flat_proposals = proposal_candidates.flatten(start_dim=2)
|
| 119 |
+
flat_targets = target_candidates.flatten(start_dim=2).to(dtype=flat_proposals.dtype)
|
| 120 |
+
distances = torch.cdist(flat_proposals, flat_targets, p=1) / float(max(1, flat_proposals.shape[-1]))
|
| 121 |
+
return 0.5 * (distances.min(dim=-1).values.mean() + distances.min(dim=-2).values.mean())
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def _proposal_target_batch(batch: dict[str, Tensor]) -> tuple[Tensor | None, Tensor | None, Tensor | None, Tensor | None]:
|
| 125 |
+
proposal_chunks = batch.get("proposal_target_action_chunks")
|
| 126 |
+
if proposal_chunks is None:
|
| 127 |
+
proposal_chunks = batch.get("candidate_action_chunks")
|
| 128 |
+
proposal_success = batch.get("proposal_target_retrieval_success")
|
| 129 |
+
if proposal_success is None:
|
| 130 |
+
proposal_success = batch.get("candidate_retrieval_success")
|
| 131 |
+
proposal_risk = batch.get("proposal_target_risk")
|
| 132 |
+
if proposal_risk is None:
|
| 133 |
+
proposal_risk = batch.get("candidate_risk")
|
| 134 |
+
proposal_utility = batch.get("proposal_target_utility")
|
| 135 |
+
if proposal_utility is None:
|
| 136 |
+
proposal_utility = batch.get("candidate_utility")
|
| 137 |
+
return proposal_chunks, proposal_success, proposal_risk, proposal_utility
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def _proposal_mode_targets(
|
| 141 |
+
proposal_mode_assignments: Tensor,
|
| 142 |
+
proposal_success: Tensor,
|
| 143 |
+
proposal_utility: Tensor,
|
| 144 |
+
num_modes: int,
|
| 145 |
+
) -> tuple[Tensor, Tensor]:
|
| 146 |
+
batch_size, candidate_count = proposal_success.shape
|
| 147 |
+
mode_assignments = proposal_mode_assignments.view(-1)[:candidate_count].long().to(device=proposal_success.device)
|
| 148 |
+
mode_success = torch.zeros(batch_size, num_modes, dtype=proposal_success.dtype, device=proposal_success.device)
|
| 149 |
+
mode_utility = torch.full(
|
| 150 |
+
(batch_size, num_modes),
|
| 151 |
+
fill_value=-1e6,
|
| 152 |
+
dtype=proposal_utility.dtype,
|
| 153 |
+
device=proposal_utility.device,
|
| 154 |
+
)
|
| 155 |
+
valid_assignment_mask = mode_assignments >= 0
|
| 156 |
+
for mode_idx in range(num_modes):
|
| 157 |
+
mask = mode_assignments == mode_idx
|
| 158 |
+
if not torch.any(mask):
|
| 159 |
+
continue
|
| 160 |
+
mode_success[:, mode_idx] = proposal_success[:, mask].amax(dim=1)
|
| 161 |
+
mode_utility[:, mode_idx] = proposal_utility[:, mask].amax(dim=1)
|
| 162 |
+
no_mode = torch.logical_or(
|
| 163 |
+
~valid_assignment_mask.any(),
|
| 164 |
+
torch.isclose(mode_success.sum(dim=1), mode_success.new_zeros(batch_size)),
|
| 165 |
+
)
|
| 166 |
+
if torch.any(no_mode):
|
| 167 |
+
mode_utility[no_mode] = 0.0
|
| 168 |
+
return mode_success, mode_utility
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def _proposal_reconstruction_targets(
|
| 172 |
+
batch: dict[str, Tensor],
|
| 173 |
+
proposal_count: int,
|
| 174 |
+
fallback_targets: Tensor | None,
|
| 175 |
+
) -> Tensor | None:
|
| 176 |
+
task_name = batch.get("task_name")
|
| 177 |
+
if isinstance(task_name, str) and task_name == "bag" and fallback_targets is not None:
|
| 178 |
+
return fallback_targets
|
| 179 |
+
teacher_candidates = batch.get("candidate_action_chunks")
|
| 180 |
+
teacher_utility = batch.get("candidate_utility")
|
| 181 |
+
if teacher_candidates is None:
|
| 182 |
+
return fallback_targets
|
| 183 |
+
if teacher_utility is None or teacher_candidates.shape[1] <= 1:
|
| 184 |
+
return teacher_candidates
|
| 185 |
+
top_k = min(teacher_candidates.shape[1], max(1, proposal_count // 2))
|
| 186 |
+
top_indices = teacher_utility.topk(k=top_k, dim=1).indices
|
| 187 |
+
gather_index = top_indices[..., None, None].expand(
|
| 188 |
+
-1,
|
| 189 |
+
-1,
|
| 190 |
+
teacher_candidates.shape[2],
|
| 191 |
+
teacher_candidates.shape[3],
|
| 192 |
+
)
|
| 193 |
+
return teacher_candidates.gather(1, gather_index)
|
| 194 |
+
|
| 195 |
+
|
| 196 |
+
def _task_name_mask(batch: dict[str, Tensor | list[str] | tuple[str, ...] | str], task_name: str, batch_size: int, device: torch.device) -> Tensor | None:
|
| 197 |
+
return _task_name_mask_for_values(batch, [task_name], batch_size=batch_size, device=device)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _task_name_mask_for_values(
|
| 201 |
+
batch: dict[str, Tensor | list[str] | tuple[str, ...] | str],
|
| 202 |
+
task_names: list[str] | tuple[str, ...],
|
| 203 |
+
batch_size: int,
|
| 204 |
+
device: torch.device,
|
| 205 |
+
) -> Tensor | None:
|
| 206 |
+
target_names = {str(name) for name in task_names}
|
| 207 |
+
task_names = batch.get("task_name")
|
| 208 |
+
if isinstance(task_names, str):
|
| 209 |
+
return torch.full((batch_size,), task_names in target_names, dtype=torch.bool, device=device)
|
| 210 |
+
if isinstance(task_names, (list, tuple)):
|
| 211 |
+
if len(task_names) < batch_size:
|
| 212 |
+
return None
|
| 213 |
+
values = [str(task_names[idx]) in target_names for idx in range(batch_size)]
|
| 214 |
+
return torch.as_tensor(values, dtype=torch.bool, device=device)
|
| 215 |
+
return None
|
| 216 |
+
|
| 217 |
+
|
| 218 |
+
def _resize_like(target: Tensor, prediction: Tensor) -> Tensor:
|
| 219 |
+
if target.shape == prediction.shape:
|
| 220 |
+
return target
|
| 221 |
+
if target.ndim == prediction.ndim == 4:
|
| 222 |
+
return F.interpolate(target.float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False)
|
| 223 |
+
if target.ndim == 3 and prediction.ndim == 4:
|
| 224 |
+
return F.interpolate(target.unsqueeze(1).float(), size=prediction.shape[-2:], mode="bilinear", align_corners=False)
|
| 225 |
+
return target
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def reveal_state_loss(pred: dict[str, Tensor], target: dict[str, Tensor], weights: LossWeights) -> dict[str, Tensor]:
|
| 229 |
+
losses = {}
|
| 230 |
+
if "phase_logits" in pred:
|
| 231 |
+
if "phase" in target:
|
| 232 |
+
phase_target = target["phase"].long()
|
| 233 |
+
else:
|
| 234 |
+
action_chunk = target.get("action_chunk")
|
| 235 |
+
if action_chunk is not None:
|
| 236 |
+
phase_target = infer_phase_targets_from_actions(action_chunk[:, 0])
|
| 237 |
+
else:
|
| 238 |
+
phase_map = torch.as_tensor([2, 3, 0], device=target["support_mode"].device, dtype=torch.long)
|
| 239 |
+
phase_target = phase_map[target["support_mode"].long()]
|
| 240 |
+
losses["phase"] = F.cross_entropy(pred["phase_logits"], phase_target)
|
| 241 |
+
else:
|
| 242 |
+
losses["phase"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 243 |
+
if "arm_role_logits" in pred:
|
| 244 |
+
role_ce = permutation_invariant_role_loss(pred["arm_role_logits"])
|
| 245 |
+
role_probs = pred["arm_role_logits"].softmax(dim=-1)
|
| 246 |
+
role_gap = torch.mean(torch.abs(role_probs[:, 0] - role_probs[:, 1]), dim=-1)
|
| 247 |
+
role_separation = torch.relu(0.25 - role_gap).mean()
|
| 248 |
+
losses["arm_role"] = role_ce + 0.5 * role_separation
|
| 249 |
+
else:
|
| 250 |
+
losses["arm_role"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 251 |
+
support_target = target["support_mode"].long()
|
| 252 |
+
losses["support_mode"] = F.cross_entropy(pred["support_mode_logits"], support_target)
|
| 253 |
+
losses["corridor"] = F.binary_cross_entropy_with_logits(
|
| 254 |
+
pred["corridor_logits"],
|
| 255 |
+
target["corridor_feasible"].float(),
|
| 256 |
+
)
|
| 257 |
+
losses["persistence"] = F.mse_loss(pred["persistence_horizon"], target["persistence_horizon"].float())
|
| 258 |
+
losses["disturbance"] = F.mse_loss(pred["disturbance_cost"], target["disturbance_cost"].float())
|
| 259 |
+
if "belief_map" in pred and "belief_map" in target:
|
| 260 |
+
losses["belief"] = F.binary_cross_entropy_with_logits(pred["belief_map"], _resize_like(target["belief_map"].float(), pred["belief_map"]))
|
| 261 |
+
else:
|
| 262 |
+
losses["belief"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 263 |
+
if "visibility_field" in pred and "visibility_map" in target:
|
| 264 |
+
losses["visibility"] = F.binary_cross_entropy_with_logits(
|
| 265 |
+
pred["visibility_field"],
|
| 266 |
+
_resize_like(target["visibility_map"].float(), pred["visibility_field"]),
|
| 267 |
+
)
|
| 268 |
+
else:
|
| 269 |
+
losses["visibility"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 270 |
+
if "clearance_field" in pred and "clearance_map" in target:
|
| 271 |
+
losses["clearance"] = F.binary_cross_entropy_with_logits(
|
| 272 |
+
pred["clearance_field"],
|
| 273 |
+
_resize_like(target["clearance_map"].float(), pred["clearance_field"]),
|
| 274 |
+
)
|
| 275 |
+
else:
|
| 276 |
+
losses["clearance"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 277 |
+
if "support_stability_field" in pred and "support_stability_map" in target:
|
| 278 |
+
losses["support_stability"] = F.binary_cross_entropy_with_logits(
|
| 279 |
+
pred["support_stability_field"],
|
| 280 |
+
_resize_like(target["support_stability_map"].float(), pred["support_stability_field"]),
|
| 281 |
+
)
|
| 282 |
+
else:
|
| 283 |
+
losses["support_stability"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 284 |
+
if "occluder_contact_field" in pred and "occluder_contact_map" in target:
|
| 285 |
+
losses["occluder_contact"] = F.binary_cross_entropy_with_logits(
|
| 286 |
+
pred["occluder_contact_field"],
|
| 287 |
+
_resize_like(target["occluder_contact_map"].float(), pred["occluder_contact_field"]),
|
| 288 |
+
)
|
| 289 |
+
else:
|
| 290 |
+
losses["occluder_contact"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 291 |
+
if "grasp_affordance_field" in pred and "grasp_affordance_map" in target:
|
| 292 |
+
losses["grasp_affordance"] = F.binary_cross_entropy_with_logits(
|
| 293 |
+
pred["grasp_affordance_field"],
|
| 294 |
+
_resize_like(target["grasp_affordance_map"].float(), pred["grasp_affordance_field"]),
|
| 295 |
+
)
|
| 296 |
+
else:
|
| 297 |
+
losses["grasp_affordance"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 298 |
+
if "reocclusion_logit" in pred and "corridor_feasible" in target:
|
| 299 |
+
target_reocclusion = target.get("reocclusion_target")
|
| 300 |
+
if target_reocclusion is None:
|
| 301 |
+
target_reocclusion = 1.0 - target["corridor_feasible"].float().amax(dim=-1)
|
| 302 |
+
if target_reocclusion.ndim < pred["reocclusion_logit"].ndim:
|
| 303 |
+
target_reocclusion = target_reocclusion.unsqueeze(-1).expand_as(pred["reocclusion_logit"])
|
| 304 |
+
losses["reocclusion"] = F.binary_cross_entropy_with_logits(pred["reocclusion_logit"], target_reocclusion)
|
| 305 |
+
else:
|
| 306 |
+
losses["reocclusion"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 307 |
+
if "persistence_uncertainty" in pred:
|
| 308 |
+
losses["uncertainty"] = pred["persistence_uncertainty"].mean()
|
| 309 |
+
else:
|
| 310 |
+
losses["uncertainty"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 311 |
+
task_metric_pairs = tuple(TASK_METRIC_NAMES)
|
| 312 |
+
metric_mask = target.get("task_metric_mask")
|
| 313 |
+
if metric_mask is None:
|
| 314 |
+
target_task_names = target.get("task_name")
|
| 315 |
+
if isinstance(target_task_names, (list, tuple, str)):
|
| 316 |
+
metric_mask = task_metric_valid_mask(
|
| 317 |
+
[str(name) for name in target_task_names] if not isinstance(target_task_names, str) else [target_task_names] * pred["support_mode_logits"].shape[0],
|
| 318 |
+
device=pred["support_mode_logits"].device,
|
| 319 |
+
batch_size=pred["support_mode_logits"].shape[0],
|
| 320 |
+
)
|
| 321 |
+
task_losses = []
|
| 322 |
+
for metric_idx, key in enumerate(task_metric_pairs):
|
| 323 |
+
if key not in pred or key not in target:
|
| 324 |
+
continue
|
| 325 |
+
if metric_mask is None:
|
| 326 |
+
task_losses.append(F.mse_loss(pred[key].float(), target[key].float()))
|
| 327 |
+
continue
|
| 328 |
+
per_sample = F.mse_loss(pred[key].float(), target[key].float(), reduction="none")
|
| 329 |
+
while per_sample.ndim > 1:
|
| 330 |
+
per_sample = per_sample.mean(dim=-1)
|
| 331 |
+
valid = metric_mask[:, metric_idx].to(dtype=per_sample.dtype)
|
| 332 |
+
if valid.sum() <= 0:
|
| 333 |
+
continue
|
| 334 |
+
task_losses.append((per_sample * valid).sum() / valid.sum().clamp_min(1.0))
|
| 335 |
+
losses["task_metrics"] = (
|
| 336 |
+
torch.stack(task_losses).mean()
|
| 337 |
+
if task_losses
|
| 338 |
+
else pred["support_mode_logits"].new_tensor(0.0)
|
| 339 |
+
)
|
| 340 |
+
if "state_confidence_logit" in pred and "state_confidence_target" in target:
|
| 341 |
+
losses["calibration"] = F.binary_cross_entropy_with_logits(
|
| 342 |
+
pred["state_confidence_logit"],
|
| 343 |
+
target["state_confidence_target"].float(),
|
| 344 |
+
)
|
| 345 |
+
else:
|
| 346 |
+
losses["calibration"] = pred["support_mode_logits"].new_tensor(0.0)
|
| 347 |
+
return losses
|
| 348 |
+
|
| 349 |
+
|
| 350 |
+
def world_model_rollout_consistency_loss(pred_rollout: dict[str, Tensor], target_rollout: dict[str, Tensor]) -> Tensor:
|
| 351 |
+
has_candidates = pred_rollout["support_mode_logits"].ndim == 4
|
| 352 |
+
candidate_dim = pred_rollout["support_mode_logits"].shape[1] if has_candidates else 1
|
| 353 |
+
|
| 354 |
+
def _expand_target(value: Tensor) -> Tensor:
|
| 355 |
+
if not has_candidates:
|
| 356 |
+
return value
|
| 357 |
+
if value.ndim >= 2 and value.shape[1] == candidate_dim:
|
| 358 |
+
return value
|
| 359 |
+
return value.unsqueeze(1).expand(-1, candidate_dim, *value.shape[1:])
|
| 360 |
+
|
| 361 |
+
def _resize_rollout_target_like(target_value: Tensor, pred_value: Tensor) -> Tensor:
|
| 362 |
+
if target_value.shape == pred_value.shape:
|
| 363 |
+
return target_value
|
| 364 |
+
if pred_value.ndim == 6:
|
| 365 |
+
flat_target = target_value.reshape(-1, target_value.shape[-3], target_value.shape[-2], target_value.shape[-1])
|
| 366 |
+
flat_pred = pred_value.reshape(-1, pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1])
|
| 367 |
+
resized = _resize_like(flat_target.float(), flat_pred)
|
| 368 |
+
return resized.reshape(*pred_value.shape[:-3], pred_value.shape[-3], pred_value.shape[-2], pred_value.shape[-1])
|
| 369 |
+
if pred_value.ndim == 5:
|
| 370 |
+
flat_target = target_value.reshape(-1, target_value.shape[-2], target_value.shape[-1])
|
| 371 |
+
if flat_target.shape[-2:] != pred_value.shape[-2:]:
|
| 372 |
+
flat_target = F.interpolate(
|
| 373 |
+
flat_target.unsqueeze(1).float(),
|
| 374 |
+
size=pred_value.shape[-2:],
|
| 375 |
+
mode="bilinear",
|
| 376 |
+
align_corners=False,
|
| 377 |
+
).squeeze(1)
|
| 378 |
+
return flat_target.reshape(*pred_value.shape[:-2], pred_value.shape[-2], pred_value.shape[-1])
|
| 379 |
+
return target_value
|
| 380 |
+
|
| 381 |
+
horizon = min(
|
| 382 |
+
pred_rollout["support_mode_logits"].shape[-2],
|
| 383 |
+
target_rollout["support_mode"].shape[-1],
|
| 384 |
+
)
|
| 385 |
+
pred_rollout = {
|
| 386 |
+
"support_mode_logits": pred_rollout["support_mode_logits"][..., :horizon, :],
|
| 387 |
+
"corridor_logits": pred_rollout["corridor_logits"][..., :horizon, :, :],
|
| 388 |
+
"persistence_horizon": pred_rollout["persistence_horizon"][..., :horizon, :],
|
| 389 |
+
"disturbance_cost": pred_rollout["disturbance_cost"][..., :horizon],
|
| 390 |
+
}
|
| 391 |
+
target_rollout = {
|
| 392 |
+
"support_mode": _expand_target(target_rollout["support_mode"][..., :horizon]),
|
| 393 |
+
"corridor_feasible": _expand_target(target_rollout["corridor_feasible"][..., :horizon, :, :]),
|
| 394 |
+
"persistence_horizon": _expand_target(target_rollout["persistence_horizon"][..., :horizon, :]),
|
| 395 |
+
"disturbance_cost": _expand_target(target_rollout["disturbance_cost"][..., :horizon]),
|
| 396 |
+
"action_chunk": _expand_target(target_rollout["action_chunk"][..., :horizon, :]),
|
| 397 |
+
}
|
| 398 |
+
if "phase" in target_rollout:
|
| 399 |
+
target_rollout["phase"] = _expand_target(target_rollout["phase"][..., :horizon])
|
| 400 |
+
corridor_target = _resize_rollout_target_like(
|
| 401 |
+
target_rollout["corridor_feasible"],
|
| 402 |
+
pred_rollout["corridor_logits"],
|
| 403 |
+
)
|
| 404 |
+
loss = (
|
| 405 |
+
F.cross_entropy(
|
| 406 |
+
pred_rollout["support_mode_logits"].reshape(-1, pred_rollout["support_mode_logits"].shape[-1]),
|
| 407 |
+
target_rollout["support_mode"].reshape(-1).long(),
|
| 408 |
+
)
|
| 409 |
+
+ F.binary_cross_entropy_with_logits(
|
| 410 |
+
pred_rollout["corridor_logits"],
|
| 411 |
+
corridor_target.float(),
|
| 412 |
+
)
|
| 413 |
+
+ F.mse_loss(pred_rollout["persistence_horizon"], target_rollout["persistence_horizon"].float())
|
| 414 |
+
+ F.mse_loss(pred_rollout["disturbance_cost"], target_rollout["disturbance_cost"].float())
|
| 415 |
+
)
|
| 416 |
+
if "phase_logits" in pred_rollout:
|
| 417 |
+
phase_target = target_rollout.get("phase")
|
| 418 |
+
if phase_target is None:
|
| 419 |
+
phase_target = infer_phase_targets_from_actions(target_rollout["action_chunk"])
|
| 420 |
+
loss = loss + 0.5 * F.cross_entropy(
|
| 421 |
+
pred_rollout["phase_logits"].reshape(-1, pred_rollout["phase_logits"].shape[-1]),
|
| 422 |
+
phase_target.reshape(-1),
|
| 423 |
+
)
|
| 424 |
+
if "arm_role_logits" in pred_rollout:
|
| 425 |
+
loss = loss + 0.25 * permutation_invariant_role_loss(pred_rollout["arm_role_logits"])
|
| 426 |
+
optional_pairs = (
|
| 427 |
+
("target_belief_field", "belief_map", "rollout_belief_map"),
|
| 428 |
+
("visibility_field", "visibility_map", "rollout_visibility_map"),
|
| 429 |
+
("clearance_field", "clearance_map", "rollout_clearance_map"),
|
| 430 |
+
("support_stability_field", "support_stability_map", "rollout_support_stability"),
|
| 431 |
+
("occluder_contact_field", "occluder_contact_map", "rollout_occluder_contact_map"),
|
| 432 |
+
("grasp_affordance_field", "grasp_affordance_map", "rollout_grasp_affordance_map"),
|
| 433 |
+
("reocclusion_field", "reocclusion_map", "rollout_reocclusion_target"),
|
| 434 |
+
)
|
| 435 |
+
for pred_key, _, target_key in optional_pairs:
|
| 436 |
+
if pred_key not in pred_rollout or target_key not in target_rollout:
|
| 437 |
+
continue
|
| 438 |
+
target_value = _expand_target(target_rollout[target_key][..., :horizon, ...])
|
| 439 |
+
pred_value = pred_rollout[pred_key][..., :horizon, :, :, :] if pred_rollout[pred_key].ndim >= 6 else pred_rollout[pred_key][..., :horizon, :, :]
|
| 440 |
+
while target_value.ndim < pred_value.ndim:
|
| 441 |
+
target_value = target_value.unsqueeze(-1)
|
| 442 |
+
if pred_value.ndim >= 5:
|
| 443 |
+
target_value = _resize_rollout_target_like(target_value, pred_value)
|
| 444 |
+
loss = loss + 0.1 * F.binary_cross_entropy_with_logits(pred_value, target_value.float())
|
| 445 |
+
return loss
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
def compute_total_loss(
|
| 449 |
+
model_output: dict[str, Tensor],
|
| 450 |
+
batch: dict[str, Tensor],
|
| 451 |
+
weights: LossWeights | None = None,
|
| 452 |
+
) -> dict[str, Tensor]:
|
| 453 |
+
weights = weights or LossWeights()
|
| 454 |
+
losses = {
|
| 455 |
+
"action": chunk_bc_loss(
|
| 456 |
+
model_output["action_mean"],
|
| 457 |
+
batch["action_chunk"],
|
| 458 |
+
mask=batch.get("action_mask"),
|
| 459 |
+
),
|
| 460 |
+
}
|
| 461 |
+
total = weights.action * losses["action"]
|
| 462 |
+
|
| 463 |
+
state_output = model_output.get("interaction_state")
|
| 464 |
+
if state_output is None:
|
| 465 |
+
state_output = model_output.get("reveal_state")
|
| 466 |
+
|
| 467 |
+
if state_output is not None and "support_mode" in batch:
|
| 468 |
+
reveal_losses = reveal_state_loss(state_output, batch, weights)
|
| 469 |
+
losses.update(reveal_losses)
|
| 470 |
+
total = (
|
| 471 |
+
total
|
| 472 |
+
+ weights.phase * reveal_losses["phase"]
|
| 473 |
+
+ weights.arm_role * reveal_losses["arm_role"]
|
| 474 |
+
+ weights.support_mode * reveal_losses["support_mode"]
|
| 475 |
+
+ weights.corridor * reveal_losses["corridor"]
|
| 476 |
+
+ weights.persistence * reveal_losses["persistence"]
|
| 477 |
+
+ weights.disturbance * reveal_losses["disturbance"]
|
| 478 |
+
+ weights.belief * reveal_losses["belief"]
|
| 479 |
+
+ weights.visibility * reveal_losses["visibility"]
|
| 480 |
+
+ weights.clearance * reveal_losses["clearance"]
|
| 481 |
+
+ weights.support_stability * reveal_losses["support_stability"]
|
| 482 |
+
+ weights.occluder_contact * reveal_losses["occluder_contact"]
|
| 483 |
+
+ weights.grasp_affordance * reveal_losses["grasp_affordance"]
|
| 484 |
+
+ weights.reocclusion * reveal_losses["reocclusion"]
|
| 485 |
+
+ weights.task_metrics * reveal_losses["task_metrics"]
|
| 486 |
+
+ weights.calibration * reveal_losses["calibration"]
|
| 487 |
+
+ 0.01 * reveal_losses["uncertainty"]
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
if model_output.get("planned_rollout") and model_output.get("rollout_source", "learned") in {"learned", "lightweight"} and (
|
| 491 |
+
"proposal_target_rollout_support_mode" in batch
|
| 492 |
+
or "candidate_rollout_support_mode" in batch
|
| 493 |
+
or "rollout_support_mode" in batch
|
| 494 |
+
):
|
| 495 |
+
if "proposal_target_rollout_support_mode" in batch:
|
| 496 |
+
rollout_target = {
|
| 497 |
+
"support_mode": batch["proposal_target_rollout_support_mode"],
|
| 498 |
+
"corridor_feasible": batch["proposal_target_rollout_corridor_feasible"],
|
| 499 |
+
"persistence_horizon": batch["proposal_target_rollout_persistence_horizon"],
|
| 500 |
+
"disturbance_cost": batch["proposal_target_rollout_disturbance_cost"],
|
| 501 |
+
"action_chunk": batch["proposal_target_action_chunks"],
|
| 502 |
+
}
|
| 503 |
+
if "proposal_target_rollout_phase" in batch:
|
| 504 |
+
rollout_target["phase"] = batch["proposal_target_rollout_phase"]
|
| 505 |
+
for optional_key in (
|
| 506 |
+
"proposal_target_rollout_belief_map",
|
| 507 |
+
"proposal_target_rollout_visibility_map",
|
| 508 |
+
"proposal_target_rollout_clearance_map",
|
| 509 |
+
"proposal_target_rollout_support_stability",
|
| 510 |
+
"proposal_target_rollout_reocclusion_target",
|
| 511 |
+
"proposal_target_rollout_occluder_contact_map",
|
| 512 |
+
"proposal_target_rollout_grasp_affordance_map",
|
| 513 |
+
):
|
| 514 |
+
if optional_key in batch:
|
| 515 |
+
rollout_target[optional_key.replace("proposal_target_", "")] = batch[optional_key]
|
| 516 |
+
elif "candidate_rollout_support_mode" in batch:
|
| 517 |
+
rollout_target = {
|
| 518 |
+
"support_mode": batch["candidate_rollout_support_mode"],
|
| 519 |
+
"corridor_feasible": batch["candidate_rollout_corridor_feasible"],
|
| 520 |
+
"persistence_horizon": batch["candidate_rollout_persistence_horizon"],
|
| 521 |
+
"disturbance_cost": batch["candidate_rollout_disturbance_cost"],
|
| 522 |
+
"action_chunk": batch["candidate_action_chunks"],
|
| 523 |
+
}
|
| 524 |
+
if "candidate_rollout_phase" in batch:
|
| 525 |
+
rollout_target["phase"] = batch["candidate_rollout_phase"]
|
| 526 |
+
for optional_key in (
|
| 527 |
+
"candidate_rollout_belief_map",
|
| 528 |
+
"candidate_rollout_visibility_map",
|
| 529 |
+
"candidate_rollout_clearance_map",
|
| 530 |
+
"candidate_rollout_support_stability",
|
| 531 |
+
"candidate_rollout_reocclusion_target",
|
| 532 |
+
"candidate_rollout_occluder_contact_map",
|
| 533 |
+
"candidate_rollout_grasp_affordance_map",
|
| 534 |
+
):
|
| 535 |
+
if optional_key in batch:
|
| 536 |
+
rollout_target[optional_key.replace("candidate_", "")] = batch[optional_key]
|
| 537 |
+
planner_indices = model_output.get("planner_topk_indices")
|
| 538 |
+
if planner_indices is not None:
|
| 539 |
+
for key, value in list(rollout_target.items()):
|
| 540 |
+
if isinstance(value, Tensor) and value.ndim >= 2 and value.shape[1] >= planner_indices.shape[1]:
|
| 541 |
+
expand_indices = planner_indices
|
| 542 |
+
while expand_indices.ndim < value.ndim:
|
| 543 |
+
expand_indices = expand_indices.unsqueeze(-1)
|
| 544 |
+
rollout_target[key] = value.gather(
|
| 545 |
+
1,
|
| 546 |
+
expand_indices.expand(-1, -1, *value.shape[2:]),
|
| 547 |
+
)
|
| 548 |
+
else:
|
| 549 |
+
rollout_target = {
|
| 550 |
+
"support_mode": batch["rollout_support_mode"],
|
| 551 |
+
"corridor_feasible": batch["rollout_corridor_feasible"],
|
| 552 |
+
"persistence_horizon": batch["rollout_persistence_horizon"],
|
| 553 |
+
"disturbance_cost": batch["rollout_disturbance_cost"],
|
| 554 |
+
"action_chunk": batch["action_chunk"],
|
| 555 |
+
}
|
| 556 |
+
if "rollout_phase" in batch:
|
| 557 |
+
rollout_target["phase"] = batch["rollout_phase"]
|
| 558 |
+
for optional_key in (
|
| 559 |
+
"rollout_belief_map",
|
| 560 |
+
"rollout_visibility_map",
|
| 561 |
+
"rollout_clearance_map",
|
| 562 |
+
"rollout_support_stability",
|
| 563 |
+
"rollout_reocclusion_target",
|
| 564 |
+
"rollout_occluder_contact_map",
|
| 565 |
+
"rollout_grasp_affordance_map",
|
| 566 |
+
):
|
| 567 |
+
if optional_key in batch:
|
| 568 |
+
rollout_target[optional_key] = batch[optional_key]
|
| 569 |
+
world_model_loss = world_model_rollout_consistency_loss(
|
| 570 |
+
model_output["planned_rollout"],
|
| 571 |
+
rollout_target,
|
| 572 |
+
)
|
| 573 |
+
if model_output.get("rollout_source", "learned") == "lightweight":
|
| 574 |
+
losses["transition"] = world_model_loss
|
| 575 |
+
losses["world_model"] = model_output["action_mean"].new_tensor(0.0)
|
| 576 |
+
total = total + weights.transition * world_model_loss
|
| 577 |
+
else:
|
| 578 |
+
losses["world_model"] = world_model_loss
|
| 579 |
+
losses["transition"] = model_output["action_mean"].new_tensor(0.0)
|
| 580 |
+
total = total + weights.world_model * world_model_loss
|
| 581 |
+
else:
|
| 582 |
+
losses["world_model"] = model_output["action_mean"].new_tensor(0.0)
|
| 583 |
+
losses["transition"] = model_output["action_mean"].new_tensor(0.0)
|
| 584 |
+
|
| 585 |
+
if "planner_success_logits" in model_output and "candidate_retrieval_success" in batch:
|
| 586 |
+
success_target = batch["candidate_retrieval_success"].float()
|
| 587 |
+
risk_target = batch.get("candidate_risk")
|
| 588 |
+
if risk_target is None:
|
| 589 |
+
risk_target = torch.clamp(
|
| 590 |
+
batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
|
| 591 |
+
0.0,
|
| 592 |
+
1.0,
|
| 593 |
+
)
|
| 594 |
+
utility_target = batch.get("candidate_utility")
|
| 595 |
+
planner_indices = model_output.get("planner_topk_indices")
|
| 596 |
+
if planner_indices is not None and success_target.shape[1] != model_output["planner_success_logits"].shape[1]:
|
| 597 |
+
success_target = success_target.gather(1, planner_indices)
|
| 598 |
+
risk_target = risk_target.gather(1, planner_indices)
|
| 599 |
+
if utility_target is not None:
|
| 600 |
+
utility_target = utility_target.gather(1, planner_indices)
|
| 601 |
+
if utility_target is None:
|
| 602 |
+
utility_target = success_target - risk_target
|
| 603 |
+
elif "utility_structured" in model_output:
|
| 604 |
+
utility_target = 0.5 * utility_target + 0.5 * model_output["utility_structured"].detach()
|
| 605 |
+
planner_scores = model_output["planner_scores"].float().clamp(-20.0, 20.0)
|
| 606 |
+
success_loss = F.binary_cross_entropy_with_logits(model_output["planner_success_logits"], success_target)
|
| 607 |
+
risk_loss = F.mse_loss(model_output["planner_risk_values"], risk_target.float())
|
| 608 |
+
pred_diff = planner_scores.unsqueeze(-1) - planner_scores.unsqueeze(-2)
|
| 609 |
+
target_diff = utility_target.float().unsqueeze(-1) - utility_target.float().unsqueeze(-2)
|
| 610 |
+
ranking_mask = target_diff.abs() > 1e-4
|
| 611 |
+
if ranking_mask.any():
|
| 612 |
+
ranking_loss = torch.relu(0.1 - torch.sign(target_diff) * pred_diff)[ranking_mask].mean()
|
| 613 |
+
else:
|
| 614 |
+
ranking_loss = planner_scores.new_tensor(0.0)
|
| 615 |
+
oracle_target = utility_target.argmax(dim=-1)
|
| 616 |
+
oracle_loss = F.cross_entropy(planner_scores, oracle_target)
|
| 617 |
+
ranking_loss = ranking_loss + 0.5 * oracle_loss
|
| 618 |
+
losses["planner_success"] = success_loss
|
| 619 |
+
losses["planner_risk"] = risk_loss
|
| 620 |
+
losses["planner_ranking"] = ranking_loss
|
| 621 |
+
total = (
|
| 622 |
+
total
|
| 623 |
+
+ weights.planner_success * success_loss
|
| 624 |
+
+ weights.planner_risk * risk_loss
|
| 625 |
+
+ weights.planner_ranking * ranking_loss
|
| 626 |
+
)
|
| 627 |
+
else:
|
| 628 |
+
losses["planner_success"] = model_output["action_mean"].new_tensor(0.0)
|
| 629 |
+
losses["planner_risk"] = model_output["action_mean"].new_tensor(0.0)
|
| 630 |
+
losses["planner_ranking"] = model_output["action_mean"].new_tensor(0.0)
|
| 631 |
+
|
| 632 |
+
if "adapter_confidence" in model_output and "state_confidence_target" in batch:
|
| 633 |
+
confidence = model_output["adapter_confidence"]
|
| 634 |
+
if confidence.ndim > 1:
|
| 635 |
+
confidence = confidence.max(dim=-1).values
|
| 636 |
+
with torch.autocast(device_type=confidence.device.type, enabled=False):
|
| 637 |
+
gate_loss = F.binary_cross_entropy(
|
| 638 |
+
confidence.float().clamp(1e-4, 1.0 - 1e-4),
|
| 639 |
+
batch["state_confidence_target"].float(),
|
| 640 |
+
)
|
| 641 |
+
losses["gate"] = gate_loss
|
| 642 |
+
total = total + weights.gate * gate_loss
|
| 643 |
+
else:
|
| 644 |
+
losses["gate"] = model_output["action_mean"].new_tensor(0.0)
|
| 645 |
+
|
| 646 |
+
if "trunk_action_mean" in model_output:
|
| 647 |
+
distill_loss = chunk_bc_loss(
|
| 648 |
+
model_output["action_mean"],
|
| 649 |
+
model_output["trunk_action_mean"].detach(),
|
| 650 |
+
)
|
| 651 |
+
losses["distillation"] = distill_loss
|
| 652 |
+
total = total + weights.distillation * distill_loss
|
| 653 |
+
else:
|
| 654 |
+
losses["distillation"] = model_output["action_mean"].new_tensor(0.0)
|
| 655 |
+
|
| 656 |
+
proposal_target_chunks, proposal_success_values, proposal_risk_values, proposal_utility_values = _proposal_target_batch(batch)
|
| 657 |
+
|
| 658 |
+
if "proposal_candidates" in model_output:
|
| 659 |
+
reconstruction_losses = []
|
| 660 |
+
batch_size = model_output["proposal_candidates"].shape[0]
|
| 661 |
+
task_names = batch.get("task_name")
|
| 662 |
+
for sample_idx in range(batch_size):
|
| 663 |
+
sample_batch: dict[str, Tensor | str] = {}
|
| 664 |
+
for key in ("candidate_action_chunks", "candidate_utility"):
|
| 665 |
+
value = batch.get(key)
|
| 666 |
+
if value is not None:
|
| 667 |
+
sample_batch[key] = value[sample_idx : sample_idx + 1]
|
| 668 |
+
if isinstance(task_names, list):
|
| 669 |
+
sample_batch["task_name"] = str(task_names[sample_idx])
|
| 670 |
+
elif isinstance(task_names, tuple):
|
| 671 |
+
sample_batch["task_name"] = str(task_names[sample_idx])
|
| 672 |
+
elif isinstance(task_names, str):
|
| 673 |
+
sample_batch["task_name"] = task_names
|
| 674 |
+
fallback_sample = (
|
| 675 |
+
proposal_target_chunks[sample_idx : sample_idx + 1]
|
| 676 |
+
if proposal_target_chunks is not None
|
| 677 |
+
else None
|
| 678 |
+
)
|
| 679 |
+
sample_targets = _proposal_reconstruction_targets(
|
| 680 |
+
sample_batch, # type: ignore[arg-type]
|
| 681 |
+
proposal_count=model_output["proposal_candidates"].shape[1],
|
| 682 |
+
fallback_targets=fallback_sample,
|
| 683 |
+
)
|
| 684 |
+
if sample_targets is None:
|
| 685 |
+
continue
|
| 686 |
+
reconstruction_losses.append(
|
| 687 |
+
proposal_set_reconstruction_loss(
|
| 688 |
+
model_output["proposal_candidates"][sample_idx : sample_idx + 1],
|
| 689 |
+
sample_targets,
|
| 690 |
+
)
|
| 691 |
+
)
|
| 692 |
+
if reconstruction_losses:
|
| 693 |
+
proposal_reconstruction = torch.stack(reconstruction_losses).mean()
|
| 694 |
+
else:
|
| 695 |
+
proposal_reconstruction = model_output["action_mean"].new_tensor(0.0)
|
| 696 |
+
losses["proposal_reconstruction"] = proposal_reconstruction
|
| 697 |
+
total = total + weights.proposal_reconstruction * proposal_reconstruction
|
| 698 |
+
else:
|
| 699 |
+
losses["proposal_reconstruction"] = model_output["action_mean"].new_tensor(0.0)
|
| 700 |
+
|
| 701 |
+
if "proposal_logits" in model_output and proposal_success_values is not None:
|
| 702 |
+
candidate_count = min(
|
| 703 |
+
model_output["proposal_logits"].shape[1],
|
| 704 |
+
proposal_success_values.shape[1],
|
| 705 |
+
)
|
| 706 |
+
proposal_logits = model_output["proposal_logits"][:, :candidate_count]
|
| 707 |
+
proposal_success_target = proposal_success_values[:, :candidate_count].float()
|
| 708 |
+
proposal_utility = proposal_utility_values
|
| 709 |
+
if proposal_utility is None:
|
| 710 |
+
proposal_risk = proposal_risk_values
|
| 711 |
+
if proposal_risk is None:
|
| 712 |
+
proposal_risk = torch.clamp(
|
| 713 |
+
batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
|
| 714 |
+
0.0,
|
| 715 |
+
1.0,
|
| 716 |
+
)
|
| 717 |
+
proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count]
|
| 718 |
+
else:
|
| 719 |
+
proposal_utility = proposal_utility[:, :candidate_count]
|
| 720 |
+
proposal_success_loss = F.binary_cross_entropy_with_logits(
|
| 721 |
+
proposal_logits,
|
| 722 |
+
proposal_success_target,
|
| 723 |
+
)
|
| 724 |
+
proposal_pred_diff = proposal_logits.unsqueeze(-1) - proposal_logits.unsqueeze(-2)
|
| 725 |
+
proposal_target_diff = proposal_utility.float().unsqueeze(-1) - proposal_utility.float().unsqueeze(-2)
|
| 726 |
+
proposal_mask = proposal_target_diff.abs() > 1e-4
|
| 727 |
+
if proposal_mask.any():
|
| 728 |
+
proposal_ranking_loss = torch.relu(0.1 - torch.sign(proposal_target_diff) * proposal_pred_diff)[
|
| 729 |
+
proposal_mask
|
| 730 |
+
].mean()
|
| 731 |
+
else:
|
| 732 |
+
proposal_ranking_loss = model_output["proposal_logits"].new_tensor(0.0)
|
| 733 |
+
proposal_oracle_target = proposal_utility.argmax(dim=-1)
|
| 734 |
+
proposal_oracle_loss = F.cross_entropy(proposal_logits, proposal_oracle_target)
|
| 735 |
+
proposal_ranking_loss = proposal_ranking_loss + 0.5 * proposal_oracle_loss
|
| 736 |
+
losses["proposal_success"] = proposal_success_loss
|
| 737 |
+
losses["proposal_ranking"] = proposal_ranking_loss
|
| 738 |
+
total = (
|
| 739 |
+
total
|
| 740 |
+
+ weights.proposal_success * proposal_success_loss
|
| 741 |
+
+ weights.proposal_ranking * proposal_ranking_loss
|
| 742 |
+
)
|
| 743 |
+
else:
|
| 744 |
+
losses["proposal_success"] = model_output["action_mean"].new_tensor(0.0)
|
| 745 |
+
losses["proposal_ranking"] = model_output["action_mean"].new_tensor(0.0)
|
| 746 |
+
|
| 747 |
+
if (
|
| 748 |
+
"proposal_mode_logits" in model_output
|
| 749 |
+
and "proposal_mode_assignments" in model_output
|
| 750 |
+
and proposal_success_values is not None
|
| 751 |
+
):
|
| 752 |
+
candidate_count = min(
|
| 753 |
+
proposal_success_values.shape[1],
|
| 754 |
+
proposal_utility_values.shape[1] if proposal_utility_values is not None else proposal_success_values.shape[1],
|
| 755 |
+
model_output["proposal_mode_assignments"].numel(),
|
| 756 |
+
)
|
| 757 |
+
proposal_success_target = proposal_success_values[:, :candidate_count].float()
|
| 758 |
+
proposal_utility = proposal_utility_values
|
| 759 |
+
if proposal_utility is None:
|
| 760 |
+
proposal_risk = proposal_risk_values
|
| 761 |
+
if proposal_risk is None:
|
| 762 |
+
proposal_risk = torch.clamp(
|
| 763 |
+
batch["candidate_final_disturbance_cost"].float() + batch["candidate_reocclusion_rate"].float(),
|
| 764 |
+
0.0,
|
| 765 |
+
1.0,
|
| 766 |
+
)
|
| 767 |
+
proposal_utility = proposal_success_target - proposal_risk[:, :candidate_count]
|
| 768 |
+
else:
|
| 769 |
+
proposal_utility = proposal_utility[:, :candidate_count].float()
|
| 770 |
+
mode_success_target, mode_utility_target = _proposal_mode_targets(
|
| 771 |
+
model_output["proposal_mode_assignments"],
|
| 772 |
+
proposal_success=proposal_success_target,
|
| 773 |
+
proposal_utility=proposal_utility,
|
| 774 |
+
num_modes=model_output["proposal_mode_logits"].shape[1],
|
| 775 |
+
)
|
| 776 |
+
proposal_mode_logits = model_output["proposal_mode_logits"]
|
| 777 |
+
proposal_mode_success_loss = F.binary_cross_entropy_with_logits(
|
| 778 |
+
proposal_mode_logits,
|
| 779 |
+
mode_success_target,
|
| 780 |
+
reduction="none",
|
| 781 |
+
)
|
| 782 |
+
proposal_mode_success_loss = proposal_mode_success_loss.mean(dim=-1)
|
| 783 |
+
proposal_mode_pred_diff = proposal_mode_logits.unsqueeze(-1) - proposal_mode_logits.unsqueeze(-2)
|
| 784 |
+
proposal_mode_target_diff = mode_utility_target.unsqueeze(-1) - mode_utility_target.unsqueeze(-2)
|
| 785 |
+
proposal_mode_mask = proposal_mode_target_diff.abs() > 1e-4
|
| 786 |
+
proposal_mode_ranking_terms = torch.relu(
|
| 787 |
+
0.1 - torch.sign(proposal_mode_target_diff) * proposal_mode_pred_diff
|
| 788 |
+
)
|
| 789 |
+
proposal_mode_ranking_den = proposal_mode_mask.sum(dim=(-1, -2)).clamp_min(1)
|
| 790 |
+
proposal_mode_ranking_loss = (proposal_mode_ranking_terms * proposal_mode_mask).sum(dim=(-1, -2)) / proposal_mode_ranking_den
|
| 791 |
+
proposal_mode_ranking_loss = torch.where(
|
| 792 |
+
proposal_mode_mask.any(dim=(-1, -2)),
|
| 793 |
+
proposal_mode_ranking_loss,
|
| 794 |
+
proposal_mode_logits.new_zeros(proposal_mode_ranking_loss.shape),
|
| 795 |
+
)
|
| 796 |
+
proposal_mode_oracle_target = mode_utility_target.argmax(dim=-1)
|
| 797 |
+
proposal_mode_oracle_loss = F.cross_entropy(
|
| 798 |
+
proposal_mode_logits,
|
| 799 |
+
proposal_mode_oracle_target,
|
| 800 |
+
reduction="none",
|
| 801 |
+
)
|
| 802 |
+
proposal_mode_loss_per_sample = (
|
| 803 |
+
proposal_mode_success_loss
|
| 804 |
+
+ proposal_mode_ranking_loss
|
| 805 |
+
+ 0.5 * proposal_mode_oracle_loss
|
| 806 |
+
)
|
| 807 |
+
task_filter = weights.proposal_mode_task_filter
|
| 808 |
+
if task_filter:
|
| 809 |
+
filtered_mask = _task_name_mask_for_values(
|
| 810 |
+
batch,
|
| 811 |
+
task_names=list(task_filter),
|
| 812 |
+
batch_size=proposal_mode_loss_per_sample.shape[0],
|
| 813 |
+
device=proposal_mode_loss_per_sample.device,
|
| 814 |
+
)
|
| 815 |
+
if filtered_mask is not None and filtered_mask.any():
|
| 816 |
+
proposal_mode_loss = proposal_mode_loss_per_sample[filtered_mask].mean()
|
| 817 |
+
else:
|
| 818 |
+
proposal_mode_loss = proposal_mode_logits.new_tensor(0.0)
|
| 819 |
+
elif weights.proposal_mode_cloth_only:
|
| 820 |
+
cloth_mask = _task_name_mask(
|
| 821 |
+
batch,
|
| 822 |
+
task_name="cloth",
|
| 823 |
+
batch_size=proposal_mode_loss_per_sample.shape[0],
|
| 824 |
+
device=proposal_mode_loss_per_sample.device,
|
| 825 |
+
)
|
| 826 |
+
if cloth_mask is not None and cloth_mask.any():
|
| 827 |
+
proposal_mode_loss = proposal_mode_loss_per_sample[cloth_mask].mean()
|
| 828 |
+
else:
|
| 829 |
+
proposal_mode_loss = proposal_mode_logits.new_tensor(0.0)
|
| 830 |
+
else:
|
| 831 |
+
proposal_mode_loss = proposal_mode_loss_per_sample.mean()
|
| 832 |
+
losses["proposal_mode"] = proposal_mode_loss
|
| 833 |
+
total = total + weights.proposal_mode * proposal_mode_loss
|
| 834 |
+
else:
|
| 835 |
+
losses["proposal_mode"] = model_output["action_mean"].new_tensor(0.0)
|
| 836 |
+
|
| 837 |
+
if "proposal_candidates" in model_output:
|
| 838 |
+
diversity_loss = proposal_diversity_loss(model_output["proposal_candidates"])
|
| 839 |
+
losses["proposal_diversity"] = diversity_loss
|
| 840 |
+
total = total + weights.proposal_diversity * diversity_loss
|
| 841 |
+
else:
|
| 842 |
+
losses["proposal_diversity"] = model_output["action_mean"].new_tensor(0.0)
|
| 843 |
+
|
| 844 |
+
if "equivariance_probe_action_mean" in model_output and "equivariance_target_action_mean" in model_output:
|
| 845 |
+
swap_loss = role_swap_consistency_loss(
|
| 846 |
+
model_output["equivariance_probe_action_mean"],
|
| 847 |
+
model_output["equivariance_target_action_mean"].detach(),
|
| 848 |
+
)
|
| 849 |
+
losses["role_swap_consistency"] = swap_loss
|
| 850 |
+
total = total + weights.role_swap_consistency * swap_loss
|
| 851 |
+
else:
|
| 852 |
+
losses["role_swap_consistency"] = model_output["action_mean"].new_tensor(0.0)
|
| 853 |
+
|
| 854 |
+
losses["total"] = total
|
| 855 |
+
return losses
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/train/trainer.py
ADDED
|
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
|
| 3 |
+
from dataclasses import dataclass
|
| 4 |
+
from typing import Sequence
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
from torch import Tensor, nn
|
| 8 |
+
|
| 9 |
+
from models.policy import (
|
| 10 |
+
AdapterWrappedPolicy,
|
| 11 |
+
BackboneOnlyPolicy,
|
| 12 |
+
ElasticRevealBimanualPolicy,
|
| 13 |
+
FoundationTrunkPolicy,
|
| 14 |
+
InteractionBimanualPolicy,
|
| 15 |
+
PolicyConfig,
|
| 16 |
+
RevealBimanualPolicy,
|
| 17 |
+
)
|
| 18 |
+
from train.losses import LossWeights, compute_total_loss
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@dataclass
|
| 22 |
+
class TrainerConfig:
|
| 23 |
+
policy_type: str = "backbone_only"
|
| 24 |
+
training_regime: str = "adapter_train_frozen_trunk"
|
| 25 |
+
eval_mode: str = "adapter_active"
|
| 26 |
+
use_bf16: bool = True
|
| 27 |
+
grad_clip_norm: float = 1.0
|
| 28 |
+
freeze_backbone: bool = True
|
| 29 |
+
gradient_checkpointing: bool = True
|
| 30 |
+
plan_during_train: bool = True
|
| 31 |
+
plan_during_eval: bool = True
|
| 32 |
+
support_mode_conditioning: bool = True
|
| 33 |
+
planner_mode: str = "trainable"
|
| 34 |
+
use_depth: bool = True
|
| 35 |
+
use_world_model: bool = True
|
| 36 |
+
use_role_tokens: bool = True
|
| 37 |
+
history_steps_override: int | None = None
|
| 38 |
+
compute_equivariance_probe: bool = True
|
| 39 |
+
trainable_parameter_prefixes: tuple[str, ...] = ()
|
| 40 |
+
adapter_mode: str = "adapter_active"
|
| 41 |
+
adapter_use_transition_model: bool = True
|
| 42 |
+
adapter_use_task_conditioning: bool = True
|
| 43 |
+
adapter_action_supervision_source: str = "selected"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
def build_policy(config: PolicyConfig, trainer_config: TrainerConfig) -> nn.Module:
|
| 47 |
+
config.backbone.freeze_backbone = trainer_config.freeze_backbone
|
| 48 |
+
config.backbone.gradient_checkpointing = trainer_config.gradient_checkpointing
|
| 49 |
+
if trainer_config.policy_type == "adapter_wrapped":
|
| 50 |
+
return AdapterWrappedPolicy(config)
|
| 51 |
+
if trainer_config.policy_type == "foundation_trunk":
|
| 52 |
+
return FoundationTrunkPolicy(config)
|
| 53 |
+
if trainer_config.policy_type == "elastic_reveal":
|
| 54 |
+
return ElasticRevealBimanualPolicy(config)
|
| 55 |
+
if trainer_config.policy_type == "reveal_state":
|
| 56 |
+
return RevealBimanualPolicy(config)
|
| 57 |
+
if trainer_config.policy_type == "interaction_state":
|
| 58 |
+
return InteractionBimanualPolicy(config)
|
| 59 |
+
return BackboneOnlyPolicy(config)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def policy_supports_planning(policy_type: str) -> bool:
|
| 63 |
+
return policy_type in {"reveal_state", "interaction_state", "elastic_reveal"}
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def planner_enabled(trainer_config: TrainerConfig, during_eval: bool) -> bool:
|
| 67 |
+
if not policy_supports_planning(trainer_config.policy_type):
|
| 68 |
+
return False
|
| 69 |
+
if trainer_config.planner_mode == "off":
|
| 70 |
+
return False
|
| 71 |
+
if during_eval:
|
| 72 |
+
return trainer_config.plan_during_eval
|
| 73 |
+
return trainer_config.plan_during_train
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def apply_planner_mode(model: nn.Module, trainer_config: TrainerConfig) -> list[str]:
|
| 77 |
+
if trainer_config.planner_mode != "proxy_pretrained":
|
| 78 |
+
return []
|
| 79 |
+
frozen_modules = []
|
| 80 |
+
for module_name in ("interaction_head", "world_model", "planner"):
|
| 81 |
+
module = getattr(model, module_name, None)
|
| 82 |
+
if module is None:
|
| 83 |
+
continue
|
| 84 |
+
frozen_modules.append(module_name)
|
| 85 |
+
for parameter in module.parameters():
|
| 86 |
+
parameter.requires_grad = False
|
| 87 |
+
return frozen_modules
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def apply_trainable_parameter_prefixes(model: nn.Module, trainer_config: TrainerConfig) -> list[str]:
|
| 91 |
+
prefixes = tuple(str(prefix) for prefix in trainer_config.trainable_parameter_prefixes)
|
| 92 |
+
if not prefixes:
|
| 93 |
+
return []
|
| 94 |
+
matched = []
|
| 95 |
+
for name, parameter in model.named_parameters():
|
| 96 |
+
trainable = any(name.startswith(prefix) for prefix in prefixes)
|
| 97 |
+
parameter.requires_grad = trainable
|
| 98 |
+
if trainable:
|
| 99 |
+
matched.append(name)
|
| 100 |
+
return matched
|
| 101 |
+
|
| 102 |
+
|
| 103 |
+
class BimanualTrainer:
|
| 104 |
+
def __init__(self, model: nn.Module, optimizer: torch.optim.Optimizer, config: TrainerConfig) -> None:
|
| 105 |
+
self.model = model
|
| 106 |
+
self.optimizer = optimizer
|
| 107 |
+
self.config = config
|
| 108 |
+
|
| 109 |
+
def _autocast_context(self) -> torch.autocast:
|
| 110 |
+
if self.config.use_bf16 and torch.cuda.is_available():
|
| 111 |
+
return torch.autocast(device_type="cuda", dtype=torch.bfloat16)
|
| 112 |
+
return torch.autocast(device_type="cpu", enabled=False)
|
| 113 |
+
|
| 114 |
+
def training_step(self, batch: dict[str, Tensor | Sequence[str]], loss_weights: LossWeights | None = None) -> dict[str, Tensor]:
|
| 115 |
+
self.optimizer.zero_grad(set_to_none=True)
|
| 116 |
+
images = batch["images"]
|
| 117 |
+
proprio = batch["proprio"]
|
| 118 |
+
texts = batch.get("texts")
|
| 119 |
+
language_tokens = batch.get("language_tokens")
|
| 120 |
+
with self._autocast_context():
|
| 121 |
+
task_names = batch.get("task_name")
|
| 122 |
+
texts_value = texts if isinstance(texts, Sequence) and not isinstance(texts, str) else None
|
| 123 |
+
task_name_value = task_names if isinstance(task_names, Sequence) and not isinstance(task_names, str) else None
|
| 124 |
+
forward_kwargs = {
|
| 125 |
+
"images": images,
|
| 126 |
+
"proprio": proprio,
|
| 127 |
+
"texts": texts_value,
|
| 128 |
+
"task_names": task_name_value,
|
| 129 |
+
"task_ids": batch.get("task_id"),
|
| 130 |
+
"language_tokens": language_tokens if isinstance(language_tokens, dict) else None,
|
| 131 |
+
"history_images": batch.get("history_images"),
|
| 132 |
+
"history_proprio": batch.get("history_proprio"),
|
| 133 |
+
"history_actions": batch.get("history_actions"),
|
| 134 |
+
"depths": batch.get("depths"),
|
| 135 |
+
"depth_valid": batch.get("depth_valid"),
|
| 136 |
+
"camera_intrinsics": batch.get("camera_intrinsics"),
|
| 137 |
+
"camera_extrinsics": batch.get("camera_extrinsics"),
|
| 138 |
+
"history_depths": batch.get("history_depths"),
|
| 139 |
+
"history_depth_valid": batch.get("history_depth_valid"),
|
| 140 |
+
"history_camera_intrinsics": batch.get("history_camera_intrinsics"),
|
| 141 |
+
"history_camera_extrinsics": batch.get("history_camera_extrinsics"),
|
| 142 |
+
}
|
| 143 |
+
if policy_supports_planning(self.config.policy_type):
|
| 144 |
+
forward_kwargs["plan"] = planner_enabled(self.config, during_eval=False)
|
| 145 |
+
forward_kwargs["support_mode_conditioning"] = self.config.support_mode_conditioning
|
| 146 |
+
if "candidate_action_chunks" in batch:
|
| 147 |
+
forward_kwargs["candidate_chunks_override"] = batch["candidate_action_chunks"]
|
| 148 |
+
if self.config.policy_type == "adapter_wrapped":
|
| 149 |
+
forward_kwargs["adapter_mode"] = self.config.adapter_mode
|
| 150 |
+
forward_kwargs["use_transition_model"] = self.config.adapter_use_transition_model
|
| 151 |
+
forward_kwargs["use_task_conditioning"] = self.config.adapter_use_task_conditioning
|
| 152 |
+
if self.config.policy_type == "elastic_reveal":
|
| 153 |
+
forward_kwargs["depths"] = batch.get("depths")
|
| 154 |
+
forward_kwargs["depth_valid"] = batch.get("depth_valid")
|
| 155 |
+
forward_kwargs["camera_intrinsics"] = batch.get("camera_intrinsics")
|
| 156 |
+
forward_kwargs["camera_extrinsics"] = batch.get("camera_extrinsics")
|
| 157 |
+
forward_kwargs["history_depths"] = batch.get("history_depths")
|
| 158 |
+
forward_kwargs["history_depth_valid"] = batch.get("history_depth_valid")
|
| 159 |
+
forward_kwargs["history_camera_intrinsics"] = batch.get("history_camera_intrinsics")
|
| 160 |
+
forward_kwargs["history_camera_extrinsics"] = batch.get("history_camera_extrinsics")
|
| 161 |
+
forward_kwargs["history_camera_valid_mask"] = batch.get("history_camera_valid_mask")
|
| 162 |
+
forward_kwargs["use_depth"] = self.config.use_depth
|
| 163 |
+
forward_kwargs["use_world_model"] = self.config.use_world_model
|
| 164 |
+
forward_kwargs["use_planner"] = planner_enabled(self.config, during_eval=False)
|
| 165 |
+
forward_kwargs["use_role_tokens"] = self.config.use_role_tokens
|
| 166 |
+
forward_kwargs["history_steps_override"] = self.config.history_steps_override
|
| 167 |
+
forward_kwargs["compute_equivariance_probe"] = self.config.compute_equivariance_probe
|
| 168 |
+
elif self.config.policy_type == "interaction_state":
|
| 169 |
+
forward_kwargs["use_role_tokens"] = self.config.use_role_tokens
|
| 170 |
+
forward_kwargs["history_steps_override"] = self.config.history_steps_override
|
| 171 |
+
model_output = self.model(**forward_kwargs)
|
| 172 |
+
if (
|
| 173 |
+
self.config.policy_type == "adapter_wrapped"
|
| 174 |
+
and self.config.adapter_action_supervision_source == "trunk"
|
| 175 |
+
and "trunk_action_mean" in model_output
|
| 176 |
+
):
|
| 177 |
+
model_output = dict(model_output)
|
| 178 |
+
model_output["selected_action_mean"] = model_output["action_mean"]
|
| 179 |
+
model_output["action_mean"] = model_output["trunk_action_mean"]
|
| 180 |
+
losses = compute_total_loss(model_output, batch, weights=loss_weights)
|
| 181 |
+
losses["total"].backward()
|
| 182 |
+
torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config.grad_clip_norm)
|
| 183 |
+
self.optimizer.step()
|
| 184 |
+
return losses
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_summary.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
|
| 3 |
+
from eval.public_benchmark_package import build_public_eval_protocol, build_target_training_spec
|
| 4 |
+
from eval.run_public_benchmark_package import summarize_public_benchmark_package
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
TARGET_TRACK_IDS = [
|
| 8 |
+
"rlbench2_put_bottle_in_fridge",
|
| 9 |
+
"rlbench2_take_out_tray",
|
| 10 |
+
"rlbench2_take_shoes_out_of_box",
|
| 11 |
+
"rlbench2_lift_tray",
|
| 12 |
+
"rlbench2_straighten_rope",
|
| 13 |
+
"rlbench2_sweep_to_dustpan",
|
| 14 |
+
"dexgarmentlab_store_tops",
|
| 15 |
+
"dexgarmentlab_fold_tops",
|
| 16 |
+
"dexgarmentlab_hang_coat",
|
| 17 |
+
]
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def _successes(num_success: int, total: int = 100) -> list[int]:
|
| 21 |
+
return [1] * num_success + [0] * (total - num_success)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _target_record(track_id: str, adapter_mode: str, seed: int, num_success: int, *, intervention: float, non_base: float) -> dict:
|
| 25 |
+
successes = _successes(num_success)
|
| 26 |
+
record = {
|
| 27 |
+
"track_id": track_id,
|
| 28 |
+
"adapter_mode": adapter_mode,
|
| 29 |
+
"successes": successes,
|
| 30 |
+
"success_rate": sum(successes) / len(successes),
|
| 31 |
+
"episodes": len(successes),
|
| 32 |
+
"seed": seed,
|
| 33 |
+
"eval_protocol": build_public_eval_protocol(track_id=track_id, eval_mode=adapter_mode, seed=seed, episodes=len(successes)),
|
| 34 |
+
"intervention_rate": intervention,
|
| 35 |
+
"non_base_selection_rate": non_base,
|
| 36 |
+
"steps_to_first_reveal_or_access": 8.0,
|
| 37 |
+
"steps_to_retrieve": 22.0,
|
| 38 |
+
"disturbance_proxy": 0.3,
|
| 39 |
+
}
|
| 40 |
+
if adapter_mode != "adapter_noop":
|
| 41 |
+
record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant=adapter_mode, seed=seed)
|
| 42 |
+
else:
|
| 43 |
+
record["train_spec"] = build_target_training_spec(track_id=track_id, model_variant="adapter_active_ft", seed=seed)
|
| 44 |
+
return record
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _hybrid_payloads() -> list[dict]:
|
| 48 |
+
trunk_scores = [35, 30, 27, 28, 26, 32, 24, 22, 27]
|
| 49 |
+
active_scores = [75, 68, 59, 60, 58, 64, 57, 55, 62]
|
| 50 |
+
interventions = [0.30, 0.24, 0.19, 0.18, 0.20, 0.19, 0.22, 0.21, 0.25]
|
| 51 |
+
non_base = [0.40, 0.22, 0.21, 0.20, 0.19, 0.18, 0.23, 0.24, 0.26]
|
| 52 |
+
payloads: list[dict] = []
|
| 53 |
+
for index, track_id in enumerate(TARGET_TRACK_IDS):
|
| 54 |
+
payloads.append(
|
| 55 |
+
_target_record(track_id, "trunk_only_ft", 17, trunk_scores[index], intervention=0.0, non_base=0.0)
|
| 56 |
+
)
|
| 57 |
+
payloads.append(
|
| 58 |
+
_target_record(track_id, "adapter_noop", 17, trunk_scores[index], intervention=0.0, non_base=0.0)
|
| 59 |
+
)
|
| 60 |
+
payloads.append(
|
| 61 |
+
_target_record(
|
| 62 |
+
track_id,
|
| 63 |
+
"adapter_active_ft",
|
| 64 |
+
17,
|
| 65 |
+
active_scores[index],
|
| 66 |
+
intervention=interventions[index],
|
| 67 |
+
non_base=non_base[index],
|
| 68 |
+
)
|
| 69 |
+
)
|
| 70 |
+
return payloads
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
def test_public_benchmark_package_summary_passes_with_clear_gain():
|
| 74 |
+
payloads = _hybrid_payloads()
|
| 75 |
+
|
| 76 |
+
summary = summarize_public_benchmark_package(payloads, bootstrap_samples=200, bootstrap_seed=0)
|
| 77 |
+
|
| 78 |
+
assert summary["headline_pass"]
|
| 79 |
+
assert summary["sign_of_life_pass"]
|
| 80 |
+
assert summary["anchor_pass"]
|
| 81 |
+
assert summary["sign_of_life_track_count"] == len(TARGET_TRACK_IDS)
|
| 82 |
+
assert summary["tracks"]["rlbench2_put_bottle_in_fridge"]["delta_active_vs_trunk"] > 0.0
|
| 83 |
+
assert summary["tracks"]["dexgarmentlab_hang_coat"]["delta_active_vs_trunk"] > 0.0
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def test_public_benchmark_package_detects_training_mismatch():
|
| 87 |
+
payloads = _hybrid_payloads()
|
| 88 |
+
payloads[8]["train_spec"]["batch_size"] = 64
|
| 89 |
+
|
| 90 |
+
with pytest.raises(ValueError, match="Training fairness mismatch"):
|
| 91 |
+
summarize_public_benchmark_package(payloads, bootstrap_samples=50, bootstrap_seed=0)
|
code/VLAarchtests4/VLAarchtests2_code/VLAarchtests/tests/test_public_benchmark_package_tracks.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from eval.public_benchmark_package import (
|
| 2 |
+
ANCHOR_ROLE,
|
| 3 |
+
TARGET_ROLE,
|
| 4 |
+
build_public_eval_protocol,
|
| 5 |
+
build_target_training_spec,
|
| 6 |
+
default_public_benchmark_manifest,
|
| 7 |
+
expected_eval_modes,
|
| 8 |
+
public_benchmark_tracks,
|
| 9 |
+
public_protocol_identity_signature,
|
| 10 |
+
training_fairness_signature,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
TARGET_TRACK_IDS = [
|
| 15 |
+
"rlbench2_put_bottle_in_fridge",
|
| 16 |
+
"rlbench2_take_out_tray",
|
| 17 |
+
"rlbench2_take_shoes_out_of_box",
|
| 18 |
+
"rlbench2_lift_tray",
|
| 19 |
+
"rlbench2_straighten_rope",
|
| 20 |
+
"rlbench2_sweep_to_dustpan",
|
| 21 |
+
"dexgarmentlab_store_tops",
|
| 22 |
+
"dexgarmentlab_fold_tops",
|
| 23 |
+
"dexgarmentlab_hang_coat",
|
| 24 |
+
]
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_public_benchmark_package_contains_expected_tracks():
|
| 28 |
+
manifest = default_public_benchmark_manifest()
|
| 29 |
+
|
| 30 |
+
assert manifest["package_name"] == "public_hybrid_bimanual_benchmark_v1"
|
| 31 |
+
assert manifest["target_track_ids"] == TARGET_TRACK_IDS
|
| 32 |
+
assert manifest["anchor_track_ids"] == []
|
| 33 |
+
assert manifest["thresholds"]["anchor_tolerance"] == 0.02
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def test_public_target_protocol_identity_is_mode_invariant():
|
| 37 |
+
protocol_signatures = {
|
| 38 |
+
public_protocol_identity_signature(
|
| 39 |
+
build_public_eval_protocol(track_id="rlbench2_put_bottle_in_fridge", eval_mode=mode, seed=17)
|
| 40 |
+
)
|
| 41 |
+
for mode in expected_eval_modes("rlbench2_put_bottle_in_fridge")
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
assert len(protocol_signatures) == 1
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def test_public_package_has_no_anchor_tracks_in_the_current_hybrid_battery():
|
| 48 |
+
assert public_benchmark_tracks(ANCHOR_ROLE) == []
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def test_training_fairness_signature_matches_for_trunk_and_adapter():
|
| 52 |
+
trunk = build_target_training_spec(track_id="dexgarmentlab_hang_coat", model_variant="trunk_only_ft", seed=17)
|
| 53 |
+
active = build_target_training_spec(track_id="dexgarmentlab_hang_coat", model_variant="adapter_active_ft", seed=17)
|
| 54 |
+
|
| 55 |
+
assert training_fairness_signature(trunk) == training_fairness_signature(active)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def test_public_track_roles_are_partitioned():
|
| 59 |
+
target_roles = {track.track_id: track.role for track in public_benchmark_tracks(TARGET_ROLE)}
|
| 60 |
+
anchor_roles = {track.track_id: track.role for track in public_benchmark_tracks(ANCHOR_ROLE)}
|
| 61 |
+
|
| 62 |
+
assert target_roles == {track_id: TARGET_ROLE for track_id in TARGET_TRACK_IDS}
|
| 63 |
+
assert anchor_roles == {}
|
code/VLAarchtests4_root/MODEL_AND_ARTIFACT_INDEX.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Model And Artifact Index
|
| 2 |
+
|
| 3 |
+
## Main Code Roots
|
| 4 |
+
|
| 5 |
+
- `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/`
|
| 6 |
+
- `code/VLAarchtests2_code/VLAarchtests/tests/`
|
| 7 |
+
|
| 8 |
+
## Main Prior Handoff Roots
|
| 9 |
+
|
| 10 |
+
- `artifacts/`
|
| 11 |
+
- `docs/`
|
| 12 |
+
- `legacy/`
|
| 13 |
+
- `setup/`
|
| 14 |
+
|
| 15 |
+
## Main Current Public Benchmark Roots
|
| 16 |
+
|
| 17 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
|
| 18 |
+
- strongest current dense-occlusion result
|
| 19 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
|
| 20 |
+
- side-by-side visual rerender of the final dense-occlusion held-out benchmark
|
| 21 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_seed17/`
|
| 22 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_seed23/`
|
| 23 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
|
| 24 |
+
- `reports/maniskill_cloth_bridge_smoke_v1/`
|
| 25 |
+
- `reports/maniskill_cloth_bridge_val_sweep_seed23/`
|
| 26 |
+
|
| 27 |
+
## Main Current Checkpoint Roots
|
| 28 |
+
|
| 29 |
+
- `outputs/maniskill_pickclutter_smoke_v5/`
|
| 30 |
+
- `outputs/maniskill_bag_bridge_smoke_v1/`
|
| 31 |
+
- `outputs/maniskill_cloth_bridge_smoke_v1/`
|
| 32 |
+
|
| 33 |
+
## Main Current Dataset Roots
|
| 34 |
+
|
| 35 |
+
- `data/maniskill_pickclutter/`
|
| 36 |
+
- `data/maniskill_bridge_retrieval/`
|
| 37 |
+
- `data/reveal_proxy/`
|
| 38 |
+
|
| 39 |
+
## Main Current Docs
|
| 40 |
+
|
| 41 |
+
- `docs/maniskill_pickclutter_correction_log_2026-04-01.md`
|
| 42 |
+
- `docs/public_bridge_smoke_run_log_2026-04-01.md`
|
| 43 |
+
- `docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md`
|
| 44 |
+
|
| 45 |
+
## Main Current Render Code
|
| 46 |
+
|
| 47 |
+
- `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/render_maniskill_pickclutter_benchmark_gifs.py`
|
| 48 |
+
|
| 49 |
+
## Repo History
|
| 50 |
+
|
| 51 |
+
- `history/VLAarchtests_previous_README.md`
|
| 52 |
+
- `history/VLAarchtests2_previous_README.md`
|
| 53 |
+
- `history/VLAarchtests3_previous_README.md`
|
code/VLAarchtests4_root/PUBLIC_BENCHMARK_RESULTS.md
ADDED
|
@@ -0,0 +1,217 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Public Benchmark Results
|
| 2 |
+
|
| 3 |
+
All dates below refer to `2026-04-01 UTC`.
|
| 4 |
+
|
| 5 |
+
## Dense Occluded Retrieval Proxy
|
| 6 |
+
|
| 7 |
+
Benchmark:
|
| 8 |
+
|
| 9 |
+
- ManiSkill `PickClutterYCB-v1`
|
| 10 |
+
|
| 11 |
+
### Completed runs
|
| 12 |
+
|
| 13 |
+
- `reports/maniskill_pickclutter_smoke/public_benchmark_package_summary.json`
|
| 14 |
+
- `trunk = 0.04`
|
| 15 |
+
- `noop = 0.04`
|
| 16 |
+
- `active = 0.04`
|
| 17 |
+
- `reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.json`
|
| 18 |
+
- `trunk = 0.04`
|
| 19 |
+
- `noop = 0.32`
|
| 20 |
+
- `active = 0.32`
|
| 21 |
+
- not adapter-specific because `active == noop`
|
| 22 |
+
- `reports/maniskill_pickclutter_smoke_v3/public_benchmark_package_summary.json`
|
| 23 |
+
- `trunk = 0.06`
|
| 24 |
+
- `noop = 0.06`
|
| 25 |
+
- `active = 0.06`
|
| 26 |
+
- `reports/maniskill_pickclutter_smoke_v4/public_benchmark_package_summary.json`
|
| 27 |
+
- `trunk = 0.48`
|
| 28 |
+
- `noop = 0.04`
|
| 29 |
+
- `active = 0.04`
|
| 30 |
+
- active intervened but regressed badly
|
| 31 |
+
- `reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json`
|
| 32 |
+
- `trunk = 0.06`
|
| 33 |
+
- `noop = 0.06`
|
| 34 |
+
- `active = 0.62`
|
| 35 |
+
- `delta = +0.56`
|
| 36 |
+
- eval-probe only, not a clean retrain
|
| 37 |
+
- `reports/maniskill_pickclutter_smoke_v5/public_benchmark_package_summary.json`
|
| 38 |
+
- `trunk = 0.04`
|
| 39 |
+
- `noop = 0.04`
|
| 40 |
+
- `active = 0.04`
|
| 41 |
+
- fairness-preserving retrain, but active still failed
|
| 42 |
+
- `reports/maniskill_pickclutter_smoke_v5_val_sweep/summary.json`
|
| 43 |
+
- val-only planner sweep
|
| 44 |
+
- `baseline_corrected = 0.00`
|
| 45 |
+
- `soft_pref = 0.00`
|
| 46 |
+
- `softer_pref = 0.625`
|
| 47 |
+
- `retrieve_open = 0.625`
|
| 48 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 49 |
+
- `trunk = 0.04`
|
| 50 |
+
- `noop = 0.04`
|
| 51 |
+
- `active = 0.62`
|
| 52 |
+
- `delta = +0.58`
|
| 53 |
+
- `95% CI = [0.44, 0.72]`
|
| 54 |
+
- `intervention_rate = 1.0`
|
| 55 |
+
- `non_base_selection_rate = 1.0`
|
| 56 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
|
| 57 |
+
- full rerender of all `50` held-out seeds for `trunk_only_ft` and `adapter_active_ft`
|
| 58 |
+
- includes `index.html`, `INDEX.md`, and `manifest.json`
|
| 59 |
+
- rerender manifest reports `0` success mismatches against the saved benchmark json files
|
| 60 |
+
|
| 61 |
+
### Exact `smoke_v5` eval tuning carried to held-out
|
| 62 |
+
|
| 63 |
+
- `mode_preference_bonus = 0.75`
|
| 64 |
+
- `premature_retrieve_penalty = 0.5`
|
| 65 |
+
- `premature_insert_penalty = 0.25`
|
| 66 |
+
- `premature_maintain_penalty = 1.0`
|
| 67 |
+
- `occlusion_maintain_gap_min_access = 0.30`
|
| 68 |
+
- `occlusion_maintain_gap_min_visibility = 0.20`
|
| 69 |
+
- `retrieve_stage_access_threshold = 0.18`
|
| 70 |
+
- `retrieve_stage_reveal_threshold = 0.18`
|
| 71 |
+
- `retrieve_stage_support_threshold = 0.18`
|
| 72 |
+
|
| 73 |
+
## Bag Retrieval Proxy
|
| 74 |
+
|
| 75 |
+
Benchmark:
|
| 76 |
+
|
| 77 |
+
- ManiSkill public bridge basket retrieval proxy
|
| 78 |
+
|
| 79 |
+
### Completed runs
|
| 80 |
+
|
| 81 |
+
- `reports/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed17.json`
|
| 82 |
+
- `0.32`
|
| 83 |
+
- `reports/maniskill_bag_bridge_smoke_v1/adapter_noop_seed17.json`
|
| 84 |
+
- `0.00`
|
| 85 |
+
- `reports/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed17.json`
|
| 86 |
+
- `0.48`
|
| 87 |
+
|
| 88 |
+
- `reports/maniskill_bag_bridge_smoke_v1/trunk_only_ft_seed23.json`
|
| 89 |
+
- `0.48`
|
| 90 |
+
- `reports/maniskill_bag_bridge_smoke_v1/adapter_noop_seed23.json`
|
| 91 |
+
- `0.08`
|
| 92 |
+
- `reports/maniskill_bag_bridge_smoke_v1/adapter_active_ft_seed23.json`
|
| 93 |
+
- `0.00`
|
| 94 |
+
|
| 95 |
+
### Seed-23 validation sweep
|
| 96 |
+
|
| 97 |
+
- `reports/maniskill_bag_bridge_val_sweep_seed23/summary.json`
|
| 98 |
+
|
| 99 |
+
Configs:
|
| 100 |
+
|
| 101 |
+
- `default`
|
| 102 |
+
- `trunk = 0.125`
|
| 103 |
+
- `noop = 0.125`
|
| 104 |
+
- `active = 0.00`
|
| 105 |
+
- `less_bonus`
|
| 106 |
+
- `trunk = 0.125`
|
| 107 |
+
- `noop = 0.125`
|
| 108 |
+
- `active = 0.125`
|
| 109 |
+
- intervention preserved
|
| 110 |
+
- `conservative`
|
| 111 |
+
- `trunk = 0.125`
|
| 112 |
+
- `noop = 0.125`
|
| 113 |
+
- `active = 0.125`
|
| 114 |
+
- intervention effectively disabled
|
| 115 |
+
- `low_bonus_high_thresh`
|
| 116 |
+
- `trunk = 0.125`
|
| 117 |
+
- `noop = 0.125`
|
| 118 |
+
- `active = 0.125`
|
| 119 |
+
- intervention effectively disabled
|
| 120 |
+
|
| 121 |
+
### Corrected held-out evals
|
| 122 |
+
|
| 123 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_seed17/public_benchmark_package_summary.json`
|
| 124 |
+
- `trunk = 0.32`
|
| 125 |
+
- `noop = 0.00`
|
| 126 |
+
- `active = 0.48`
|
| 127 |
+
- `delta = +0.16`
|
| 128 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_seed23/public_benchmark_package_summary.json`
|
| 129 |
+
- `trunk = 0.48`
|
| 130 |
+
- `noop = 0.08`
|
| 131 |
+
- `active = 0.48`
|
| 132 |
+
- `delta = +0.00`
|
| 133 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
|
| 134 |
+
- `trunk = 0.40`
|
| 135 |
+
- `noop = 0.04`
|
| 136 |
+
- `active = 0.48`
|
| 137 |
+
- `delta = +0.08`
|
| 138 |
+
- run-bootstrap CI `[0.00, 0.16]`
|
| 139 |
+
|
| 140 |
+
## Cloth Retrieval Proxy
|
| 141 |
+
|
| 142 |
+
Benchmark:
|
| 143 |
+
|
| 144 |
+
- ManiSkill public bridge cloth retrieval proxy
|
| 145 |
+
|
| 146 |
+
### Completed held-out seeds
|
| 147 |
+
|
| 148 |
+
- `seed17`
|
| 149 |
+
- `trunk = 0.04`
|
| 150 |
+
- `noop = 0.04`
|
| 151 |
+
- `active = 0.10`
|
| 152 |
+
- `intervention = 0.3369`
|
| 153 |
+
- `non_base = 0.2674`
|
| 154 |
+
- `seed23`
|
| 155 |
+
- `trunk = 0.04`
|
| 156 |
+
- `noop = 0.02`
|
| 157 |
+
- `active = 0.02`
|
| 158 |
+
- `intervention = 0.0`
|
| 159 |
+
- `non_base = 0.0`
|
| 160 |
+
- `seed29`
|
| 161 |
+
- `trunk = 0.04`
|
| 162 |
+
- `noop = 0.04`
|
| 163 |
+
- `active = 0.04`
|
| 164 |
+
- `intervention = 0.0`
|
| 165 |
+
- `non_base = 0.0`
|
| 166 |
+
|
| 167 |
+
3-seed aggregate:
|
| 168 |
+
|
| 169 |
+
- `trunk = 0.0400`
|
| 170 |
+
- `noop = 0.0333`
|
| 171 |
+
- `active = 0.0533`
|
| 172 |
+
- `delta = +0.0133`
|
| 173 |
+
|
| 174 |
+
### Seed-23 cloth validation sweep
|
| 175 |
+
|
| 176 |
+
- `reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json`
|
| 177 |
+
|
| 178 |
+
Configs:
|
| 179 |
+
|
| 180 |
+
- `default`
|
| 181 |
+
- `trunk = 0.25`
|
| 182 |
+
- `noop = 0.125`
|
| 183 |
+
- `active = 0.125`
|
| 184 |
+
- `intervention = 0.0`
|
| 185 |
+
- `low_thresh`
|
| 186 |
+
- `active = 0.125`
|
| 187 |
+
- `intervention = 0.2`
|
| 188 |
+
- `non_base = 0.0667`
|
| 189 |
+
- `low_thresh_less_bonus`
|
| 190 |
+
- `active = 0.125`
|
| 191 |
+
- `intervention = 0.2`
|
| 192 |
+
- `non_base = 0.0667`
|
| 193 |
+
- `very_low_thresh_less_bonus`
|
| 194 |
+
- `active = 0.125`
|
| 195 |
+
- `intervention = 1.0`
|
| 196 |
+
- `non_base = 0.5333`
|
| 197 |
+
|
| 198 |
+
Interpretation:
|
| 199 |
+
|
| 200 |
+
- seed23 cloth was not recoverable by eval-side planner tuning alone
|
| 201 |
+
|
| 202 |
+
## Single-Seed Combined Proxy Suite
|
| 203 |
+
|
| 204 |
+
- `reports/public_proxy_suite_smoke_v1/combined_summary.json`
|
| 205 |
+
|
| 206 |
+
Single-seed summary:
|
| 207 |
+
|
| 208 |
+
- occlusion proxy: `+0.58`
|
| 209 |
+
- bag proxy: `+0.16`
|
| 210 |
+
- cloth proxy: `+0.06`
|
| 211 |
+
- macro delta: `+0.267`
|
| 212 |
+
|
| 213 |
+
This combined single-seed picture is useful historically, but the stronger current read is:
|
| 214 |
+
|
| 215 |
+
- occlusion: strong
|
| 216 |
+
- bag: modestly positive across corrected 2-seed evaluation
|
| 217 |
+
- cloth: weak/inconclusive across 3 seeds
|
code/VLAarchtests4_root/README.md
ADDED
|
@@ -0,0 +1,407 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- robotics
|
| 4 |
+
- vision-language-action
|
| 5 |
+
- bimanual-manipulation
|
| 6 |
+
- maniskill
|
| 7 |
+
- rlbench
|
| 8 |
+
- rgbd
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
# VLAarchtests4
|
| 12 |
+
|
| 13 |
+
`VLAarchtests4` is the fresh organization repo for the RunPod work staged from `/workspace` on `2026-04-01 UTC`.
|
| 14 |
+
|
| 15 |
+
It carries forward the earlier repo lineage and adds the current public-sim benchmark package work:
|
| 16 |
+
|
| 17 |
+
- `VLAarchtests`
|
| 18 |
+
- early proxy + RLBench architecture search, handoff checkpoints, and environment recreation files from the `2026-03-25/26` sessions
|
| 19 |
+
- `VLAarchtests2`
|
| 20 |
+
- larger exploratory organization repo with more baselines, overlap/anchor work, frequent model changes, mixed artifacts, and several results that required later reinterpretation
|
| 21 |
+
- `VLAarchtests3`
|
| 22 |
+
- cleaned export focused on the elastic-occlusion `trunk + structured adapter + no-op fallback` refactor, validated tests, current checkpoints, and handoff docs
|
| 23 |
+
- `VLAarchtests4`
|
| 24 |
+
- keeps the `VLAarchtests3` export intact and adds the full current workspace `reports/`, `outputs/`, and `data/` trees, including all public benchmark smoke runs, checkpoint directories, dataset bundles, validation sweeps, and environment snapshots from the public-sim evaluation pass
|
| 25 |
+
|
| 26 |
+
## What This Repo Adds
|
| 27 |
+
|
| 28 |
+
The main new addition in this repo is the public benchmark track work for the elastic-occlusion adapter:
|
| 29 |
+
|
| 30 |
+
- real public-sim smoke runs on:
|
| 31 |
+
- ManiSkill `PickClutterYCB-v1` as the dense occluded retrieval proxy
|
| 32 |
+
- ManiSkill bridge basket retrieval proxy as the bag retrieval proxy
|
| 33 |
+
- ManiSkill bridge cloth retrieval proxy as the folded-cloth retrieval proxy
|
| 34 |
+
- the public benchmark package code and summaries
|
| 35 |
+
- the train/eval logs, checkpoints, cached datasets, validation sweeps, and correction logs for those runs
|
| 36 |
+
- full visual rerenders of the final `smoke_v5_eval_tuned_softerpref` dense-occlusion benchmark for both `trunk_only_ft` and `adapter_active_ft`
|
| 37 |
+
- the same-machine environment snapshot for the public benchmark stack used on this RunPod
|
| 38 |
+
|
| 39 |
+
## Top-Level Contents
|
| 40 |
+
|
| 41 |
+
- `code/`
|
| 42 |
+
- the cleaned code snapshot inherited from `VLAarchtests3`
|
| 43 |
+
- `artifacts/`
|
| 44 |
+
- prior staged checkpoints, proxy data, reports, and generated configs already bundled by `VLAarchtests3`
|
| 45 |
+
- `docs/`
|
| 46 |
+
- prior handoff/audit docs plus the current public benchmark run logs and correction notes
|
| 47 |
+
- `legacy/`
|
| 48 |
+
- older exact artifacts preserved by `VLAarchtests3`
|
| 49 |
+
- `setup/`
|
| 50 |
+
- prior environment files plus a new public benchmark environment snapshot under `setup/public_benchmark/`
|
| 51 |
+
- `history/`
|
| 52 |
+
- copied README history for `VLAarchtests`, `VLAarchtests2`, and `VLAarchtests3`
|
| 53 |
+
- `reports/`
|
| 54 |
+
- the full current `/workspace/workspace/reports` tree from this machine
|
| 55 |
+
- `outputs/`
|
| 56 |
+
- the full current `/workspace/workspace/outputs` tree from this machine
|
| 57 |
+
- `data/`
|
| 58 |
+
- the full current `/workspace/workspace/data` tree from this machine
|
| 59 |
+
- `PUBLIC_BENCHMARK_RESULTS.md`
|
| 60 |
+
- compact index of all public benchmark train/eval results from this session
|
| 61 |
+
- `MODEL_AND_ARTIFACT_INDEX.md`
|
| 62 |
+
- practical map of the main artifact roots to start from
|
| 63 |
+
|
| 64 |
+
## Benchmark GIF Renders
|
| 65 |
+
|
| 66 |
+
The repo now also includes a full rendered replay of the final dense-occlusion benchmark:
|
| 67 |
+
|
| 68 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
|
| 69 |
+
- `50` held-out `trunk_only_ft` gifs
|
| 70 |
+
- `50` held-out `adapter_active_ft` gifs
|
| 71 |
+
- `index.html`, `INDEX.md`, and `manifest.json` for browsing and validation
|
| 72 |
+
- renderer:
|
| 73 |
+
- `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/render_maniskill_pickclutter_benchmark_gifs.py`
|
| 74 |
+
|
| 75 |
+
Important caveats:
|
| 76 |
+
|
| 77 |
+
- these gifs are rerendered from the saved `smoke_v5_eval_tuned_softerpref` checkpoints and exact held-out seeds, not a different benchmark run
|
| 78 |
+
- the rerender kept the same `softer_pref` planner override used in the reported held-out result
|
| 79 |
+
- the rerender manifest records `0` success mismatches versus the saved benchmark json files
|
| 80 |
+
- only the dense-occlusion track has this full gif export right now
|
| 81 |
+
|
| 82 |
+
## Architecture State Carried Forward
|
| 83 |
+
|
| 84 |
+
The core model family inherited from `VLAarchtests3` is still:
|
| 85 |
+
|
| 86 |
+
- `trunk_only`
|
| 87 |
+
- `adapter_noop`
|
| 88 |
+
- `adapter_active`
|
| 89 |
+
|
| 90 |
+
The important architectural state carried into the public benchmark work is:
|
| 91 |
+
|
| 92 |
+
- wrapped-policy interface with exact `trunk_only`, `adapter_noop`, and `adapter_active` modes
|
| 93 |
+
- structured reveal/retrieve adapter with:
|
| 94 |
+
- state prediction
|
| 95 |
+
- task-routed proposal families
|
| 96 |
+
- retrieve-feasibility gating
|
| 97 |
+
- lightweight transition model
|
| 98 |
+
- planner/reranker
|
| 99 |
+
- planner fixes that replaced hard vetoes with softer stage penalties in:
|
| 100 |
+
- `code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py`
|
| 101 |
+
|
| 102 |
+
## Public Benchmark Summary
|
| 103 |
+
|
| 104 |
+
Detailed per-run results are in `PUBLIC_BENCHMARK_RESULTS.md`. The short version is:
|
| 105 |
+
|
| 106 |
+
### 1. Dense occluded retrieval proxy
|
| 107 |
+
|
| 108 |
+
Benchmark:
|
| 109 |
+
|
| 110 |
+
- ManiSkill `PickClutterYCB-v1`
|
| 111 |
+
|
| 112 |
+
Best current held-out result:
|
| 113 |
+
|
| 114 |
+
- directory:
|
| 115 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
|
| 116 |
+
- summary:
|
| 117 |
+
- `trunk_only_ft = 0.04`
|
| 118 |
+
- `adapter_noop = 0.04`
|
| 119 |
+
- `adapter_active_ft = 0.62`
|
| 120 |
+
- `delta_active_vs_trunk = +0.58`
|
| 121 |
+
- `95% CI = [0.44, 0.72]`
|
| 122 |
+
- `intervention_rate = 1.0`
|
| 123 |
+
- `non_base_selection_rate = 1.0`
|
| 124 |
+
|
| 125 |
+
Important caveat:
|
| 126 |
+
|
| 127 |
+
- this was not a new retrain after `smoke_v5`
|
| 128 |
+
- it used the same `smoke_v5` checkpoints with planner hyperparameters selected on the frozen validation split and then applied once to the untouched held-out split
|
| 129 |
+
|
| 130 |
+
### 2. Bag retrieval proxy
|
| 131 |
+
|
| 132 |
+
Benchmark:
|
| 133 |
+
|
| 134 |
+
- public ManiSkill bridge basket retrieval proxy
|
| 135 |
+
|
| 136 |
+
Current fair read:
|
| 137 |
+
|
| 138 |
+
- seed `17` corrected held-out:
|
| 139 |
+
- `trunk = 0.32`
|
| 140 |
+
- `noop = 0.00`
|
| 141 |
+
- `active = 0.48`
|
| 142 |
+
- seed `23` corrected held-out:
|
| 143 |
+
- `trunk = 0.48`
|
| 144 |
+
- `noop = 0.08`
|
| 145 |
+
- `active = 0.48`
|
| 146 |
+
- corrected 2-seed aggregate:
|
| 147 |
+
- `trunk = 0.40`
|
| 148 |
+
- `noop = 0.04`
|
| 149 |
+
- `active = 0.48`
|
| 150 |
+
- `delta = +0.08`
|
| 151 |
+
|
| 152 |
+
Interpretation:
|
| 153 |
+
|
| 154 |
+
- bag remains modestly positive after using one consistent corrected planner across seeds
|
| 155 |
+
- the effect is smaller and less clean than the best occlusion result
|
| 156 |
+
|
| 157 |
+
### 3. Cloth retrieval proxy
|
| 158 |
+
|
| 159 |
+
Benchmark:
|
| 160 |
+
|
| 161 |
+
- public ManiSkill bridge cloth retrieval proxy
|
| 162 |
+
|
| 163 |
+
Current read:
|
| 164 |
+
|
| 165 |
+
- seed `17`:
|
| 166 |
+
- `trunk = 0.04`
|
| 167 |
+
- `noop = 0.04`
|
| 168 |
+
- `active = 0.10`
|
| 169 |
+
- seed `23`:
|
| 170 |
+
- `trunk = 0.04`
|
| 171 |
+
- `noop = 0.02`
|
| 172 |
+
- `active = 0.02`
|
| 173 |
+
- seed `29`:
|
| 174 |
+
- `trunk = 0.04`
|
| 175 |
+
- `noop = 0.04`
|
| 176 |
+
- `active = 0.04`
|
| 177 |
+
- 3-seed aggregate:
|
| 178 |
+
- `trunk = 0.0400`
|
| 179 |
+
- `noop = 0.0333`
|
| 180 |
+
- `active = 0.0533`
|
| 181 |
+
- `delta = +0.0133`
|
| 182 |
+
|
| 183 |
+
Interpretation:
|
| 184 |
+
|
| 185 |
+
- cloth is weak and unstable
|
| 186 |
+
- current evidence does not support a strong cloth-specific win
|
| 187 |
+
|
| 188 |
+
## Important Fairness Notes
|
| 189 |
+
|
| 190 |
+
The fairness story is mixed and should be stated plainly.
|
| 191 |
+
|
| 192 |
+
What is fair in the strongest public benchmark result:
|
| 193 |
+
|
| 194 |
+
- same initialization checkpoint for `trunk_only_ft` and `adapter_active_ft`
|
| 195 |
+
- same train/val/test split within each task
|
| 196 |
+
- same optimizer, LR, batch size, and unfreeze scope within each task
|
| 197 |
+
- `adapter_noop` is evaluated from the same adapter checkpoint as `adapter_active_ft`
|
| 198 |
+
- the held-out test episodes were not hand-picked after seeing outcomes
|
| 199 |
+
|
| 200 |
+
What is not fully paper-clean yet:
|
| 201 |
+
|
| 202 |
+
- most current public benchmark evidence is smoke-scale and low-seed
|
| 203 |
+
- the occlusion headline result depends on validation-selected planner tuning on top of a fixed checkpoint
|
| 204 |
+
- bag required eval-side planner correction for one seed to avoid a collapse
|
| 205 |
+
- cloth remains weak even after additional seeds and val sweeps
|
| 206 |
+
|
| 207 |
+
### PickClutter Split Fairness
|
| 208 |
+
|
| 209 |
+
The important point for the dense-occlusion track is that the dataset split did not drift across the early smoke versions.
|
| 210 |
+
|
| 211 |
+
- `data/maniskill_pickclutter/smoke_v1/episode_splits.json`
|
| 212 |
+
- `data/maniskill_pickclutter/smoke_v2/episode_splits.json`
|
| 213 |
+
- `data/maniskill_pickclutter/smoke_v3/episode_splits.json`
|
| 214 |
+
|
| 215 |
+
These files contain the same episode ids:
|
| 216 |
+
|
| 217 |
+
- train: `170000..170031`
|
| 218 |
+
- val: `171000..171007`
|
| 219 |
+
- eval: `172000..172049`
|
| 220 |
+
|
| 221 |
+
Also:
|
| 222 |
+
|
| 223 |
+
- there is no `data/maniskill_pickclutter/smoke_v4/`
|
| 224 |
+
- there is no `data/maniskill_pickclutter/smoke_v5/`
|
| 225 |
+
|
| 226 |
+
`smoke_v4` and `smoke_v5` were code/report version labels, not new held-out episode bundles.
|
| 227 |
+
|
| 228 |
+
### What Changed Across PickClutter Versions
|
| 229 |
+
|
| 230 |
+
The big changes across `smoke_v2`, `smoke_v3`, `smoke_v4`, and `smoke_v5` were:
|
| 231 |
+
|
| 232 |
+
- more benchmark-derived state supervision
|
| 233 |
+
- transition-model training enablement
|
| 234 |
+
- planner bug fixes
|
| 235 |
+
- fairness fixes so the adapter checkpoint did not hide a stronger shared trunk
|
| 236 |
+
- then a frozen-validation planner sweep for the final held-out eval
|
| 237 |
+
|
| 238 |
+
The big occlusion win was not caused by changing the eval episodes.
|
| 239 |
+
|
| 240 |
+
### Dense-Occlusion Render Artifacts
|
| 241 |
+
|
| 242 |
+
The final dense-occlusion run also has a full visual export in:
|
| 243 |
+
|
| 244 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref_gifs/`
|
| 245 |
+
|
| 246 |
+
Those gifs show the robot interacting with the 3D scene and overlay the adaptor state per frame. For `adapter_active_ft`, the overlay includes:
|
| 247 |
+
|
| 248 |
+
- adaptor on/off state
|
| 249 |
+
- whether a non-base proposal was selected
|
| 250 |
+
- candidate index
|
| 251 |
+
- planner name
|
| 252 |
+
- planner score/confidence
|
| 253 |
+
- state signals such as visibility, access, gap, and damage
|
| 254 |
+
|
| 255 |
+
## Crucial Caveats
|
| 256 |
+
|
| 257 |
+
### Occlusion result was planner-tuned
|
| 258 |
+
|
| 259 |
+
The large jump in:
|
| 260 |
+
|
| 261 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/`
|
| 262 |
+
|
| 263 |
+
came from validation-selected planner tuning on top of the same `smoke_v5` checkpoint.
|
| 264 |
+
|
| 265 |
+
The selected override values were:
|
| 266 |
+
|
| 267 |
+
- `mode_preference_bonus = 0.75`
|
| 268 |
+
- `premature_retrieve_penalty = 0.5`
|
| 269 |
+
- `premature_insert_penalty = 0.25`
|
| 270 |
+
- `premature_maintain_penalty = 1.0`
|
| 271 |
+
- `occlusion_maintain_gap_min_access = 0.30`
|
| 272 |
+
- `occlusion_maintain_gap_min_visibility = 0.20`
|
| 273 |
+
- `retrieve_stage_access_threshold = 0.18`
|
| 274 |
+
- `retrieve_stage_reveal_threshold = 0.18`
|
| 275 |
+
- `retrieve_stage_support_threshold = 0.18`
|
| 276 |
+
|
| 277 |
+
That was a validation-only selection step. It was not a fresh retrain.
|
| 278 |
+
|
| 279 |
+
### Bag and cloth did not use real depth
|
| 280 |
+
|
| 281 |
+
The bridge-task runner for the bag and cloth proxies used:
|
| 282 |
+
|
| 283 |
+
- one real RGB camera
|
| 284 |
+
- copied into all camera slots
|
| 285 |
+
- zero-filled depth channels
|
| 286 |
+
|
| 287 |
+
The runner labels this stack:
|
| 288 |
+
|
| 289 |
+
- `rgb_triplicate_zero_depth`
|
| 290 |
+
|
| 291 |
+
This is a real limitation and it should not be hidden.
|
| 292 |
+
|
| 293 |
+
It happened because the bridge proxy runner used a compatibility shim to satisfy the shared multi-camera tensor interface without plumbing real bridge-scene multiview depth through the stack.
|
| 294 |
+
|
| 295 |
+
Consequences:
|
| 296 |
+
|
| 297 |
+
- bag and cloth are not modality-matched to the PickClutter runs
|
| 298 |
+
- PickClutter used real `rgbd_3cam`
|
| 299 |
+
- bag and cloth used weaker perception input
|
| 300 |
+
|
| 301 |
+
### Bag and cloth also used a different control wrapper
|
| 302 |
+
|
| 303 |
+
PickClutter:
|
| 304 |
+
|
| 305 |
+
- observation stack: `rgbd_3cam`
|
| 306 |
+
- action space: `bimanual_delta_pose`
|
| 307 |
+
|
| 308 |
+
Bag and cloth:
|
| 309 |
+
|
| 310 |
+
- observation stack: `rgb_triplicate_zero_depth`
|
| 311 |
+
- action space: `widowx_delta_pose`
|
| 312 |
+
|
| 313 |
+
So the cross-track story is architecture-consistent but not fully input/control-identical.
|
| 314 |
+
|
| 315 |
+
### `smoke_v4_evalprobe_fromv3` is not a clean retrain result
|
| 316 |
+
|
| 317 |
+
This run:
|
| 318 |
+
|
| 319 |
+
- `reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/`
|
| 320 |
+
|
| 321 |
+
used corrected planner logic on top of `smoke_v3` weights. It is useful evidence that the active adapter can matter, but it is not a clean end-to-end retrain.
|
| 322 |
+
|
| 323 |
+
## What Was Actually Learned
|
| 324 |
+
|
| 325 |
+
The current repo supports the following claims:
|
| 326 |
+
|
| 327 |
+
- the structured adapter is still alive
|
| 328 |
+
- the active branch can clearly matter on a real public dense-occlusion benchmark proxy
|
| 329 |
+
- `adapter_noop` remains a useful fairness control
|
| 330 |
+
- bag-like retrieval still shows modest positive evidence
|
| 331 |
+
- cloth-like retrieval is currently the weak link
|
| 332 |
+
|
| 333 |
+
It does not support the following stronger claims yet:
|
| 334 |
+
|
| 335 |
+
- broad superiority on realistic manipulation benchmarks
|
| 336 |
+
- stable multi-seed wins across all three target-like public proxy tracks
|
| 337 |
+
- a clean modality-matched comparison across occlusion, bag, and cloth
|
| 338 |
+
|
| 339 |
+
## Environment And Setup
|
| 340 |
+
|
| 341 |
+
Two environment stories exist in this repo.
|
| 342 |
+
|
| 343 |
+
### Prior `VLAarchtests3` / RLBench stack
|
| 344 |
+
|
| 345 |
+
Preserved under:
|
| 346 |
+
|
| 347 |
+
- `setup/ENVIRONMENT.md`
|
| 348 |
+
- `setup/env_vars.sh`
|
| 349 |
+
- `setup/rlbench_pip_freeze.txt`
|
| 350 |
+
|
| 351 |
+
This is the older RLBench / AnyBimanual oriented environment.
|
| 352 |
+
|
| 353 |
+
### Current public benchmark stack
|
| 354 |
+
|
| 355 |
+
Preserved under:
|
| 356 |
+
|
| 357 |
+
- `setup/public_benchmark/ENVIRONMENT.md`
|
| 358 |
+
- `setup/public_benchmark/env_vars.sh`
|
| 359 |
+
- `setup/public_benchmark/python_version.txt`
|
| 360 |
+
- `setup/public_benchmark/uname.txt`
|
| 361 |
+
- `setup/public_benchmark/nvidia_smi.txt`
|
| 362 |
+
- `setup/public_benchmark/gpu_short.txt`
|
| 363 |
+
- `setup/public_benchmark/pip_freeze_python311.txt`
|
| 364 |
+
- `setup/public_benchmark/rlbench_env_pip_freeze.txt`
|
| 365 |
+
- `setup/public_benchmark/hf_env.txt`
|
| 366 |
+
|
| 367 |
+
The public benchmark runs in this session were assembled on:
|
| 368 |
+
|
| 369 |
+
- GPU: `NVIDIA L40S`
|
| 370 |
+
- VRAM: `46068 MiB`
|
| 371 |
+
- driver: `580.126.09`
|
| 372 |
+
- Python: `3.11.10`
|
| 373 |
+
- kernel: `Linux 6.8.0-88-generic`
|
| 374 |
+
|
| 375 |
+
## Recommended Starting Points
|
| 376 |
+
|
| 377 |
+
If you want the strongest current public benchmark evidence, start here:
|
| 378 |
+
|
| 379 |
+
- `docs/maniskill_pickclutter_correction_log_2026-04-01.md`
|
| 380 |
+
- `reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 381 |
+
|
| 382 |
+
If you want the bag/cloth public bridge follow-up, start here:
|
| 383 |
+
|
| 384 |
+
- `docs/public_bridge_smoke_run_log_2026-04-01.md`
|
| 385 |
+
- `reports/maniskill_bag_bridge_eval_less_bonus_2seed_manual_summary.json`
|
| 386 |
+
- `reports/maniskill_cloth_bridge_val_sweep_seed23/summary.json`
|
| 387 |
+
|
| 388 |
+
If you want the repo lineage context, start here:
|
| 389 |
+
|
| 390 |
+
- `history/VLAarchtests_previous_README.md`
|
| 391 |
+
- `history/VLAarchtests2_previous_README.md`
|
| 392 |
+
- `history/VLAarchtests3_previous_README.md`
|
| 393 |
+
|
| 394 |
+
## Bottom Line
|
| 395 |
+
|
| 396 |
+
This repo is the complete organization package for the current workspace state.
|
| 397 |
+
|
| 398 |
+
It includes:
|
| 399 |
+
|
| 400 |
+
- the `VLAarchtests3` export base
|
| 401 |
+
- the full current machine `reports/`, `outputs/`, and `data/` trees
|
| 402 |
+
- the public benchmark code, datasets, checkpoints, and results
|
| 403 |
+
- the environment files needed to stand up the same stack on similar hardware
|
| 404 |
+
|
| 405 |
+
Use it as the archival handoff state for continuing the elastic-occlusion adapter work.
|
| 406 |
+
|
| 407 |
+
Do not cite it as if all three target-like public proxy tracks are already cleanly solved. The occlusion track is the strongest current evidence; bag is modest; cloth remains weak; and the bridge-task perception stack still needs a proper real-depth rewrite.
|
code/VLAarchtests4_root/docs/maniskill_pickclutter_correction_log_2026-04-01.md
ADDED
|
@@ -0,0 +1,187 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ManiSkill PickClutter Correction Log (2026-04-01)
|
| 2 |
+
|
| 3 |
+
## Scope
|
| 4 |
+
|
| 5 |
+
Public benchmark:
|
| 6 |
+
|
| 7 |
+
- ManiSkill 3 `PickClutterYCB-v1`
|
| 8 |
+
|
| 9 |
+
Frozen public split reused across all runs:
|
| 10 |
+
|
| 11 |
+
- train demos: `32` episodes
|
| 12 |
+
- val demos: `8` episodes
|
| 13 |
+
- eval episodes: `50`
|
| 14 |
+
- seed: `17`
|
| 15 |
+
- data bundle: `/workspace/workspace/data/maniskill_pickclutter/smoke_v3`
|
| 16 |
+
|
| 17 |
+
Fair comparison modes:
|
| 18 |
+
|
| 19 |
+
- `trunk_only_ft`
|
| 20 |
+
- `adapter_noop`
|
| 21 |
+
- `adapter_active_ft`
|
| 22 |
+
|
| 23 |
+
## Code Changes
|
| 24 |
+
|
| 25 |
+
Runner changes:
|
| 26 |
+
|
| 27 |
+
- enabled candidate rollout supervision from real ManiSkill states
|
| 28 |
+
- enabled adapter transition-model training/eval
|
| 29 |
+
- unfroze `adapter.transition_model`
|
| 30 |
+
- set non-zero transition loss weight
|
| 31 |
+
- added ManiSkill smoke planner overrides for the occlusion proxy:
|
| 32 |
+
- `adapter_confidence_threshold=0.50`
|
| 33 |
+
- `retrieve_access_threshold=0.08`
|
| 34 |
+
- `retrieve_persistence_threshold=0.12`
|
| 35 |
+
- `retrieve_support_threshold=0.08`
|
| 36 |
+
- `retrieve_reocclusion_threshold=0.92`
|
| 37 |
+
|
| 38 |
+
Planner correction:
|
| 39 |
+
|
| 40 |
+
- changed adapter stage rules from hard vetoes to soft penalties in
|
| 41 |
+
`/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/models/planner.py`
|
| 42 |
+
|
| 43 |
+
## Runs
|
| 44 |
+
|
| 45 |
+
### 1. `smoke_v3` corrected-train baseline
|
| 46 |
+
|
| 47 |
+
Artifacts:
|
| 48 |
+
|
| 49 |
+
- summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v3/public_benchmark_package_summary.json`
|
| 50 |
+
|
| 51 |
+
Result:
|
| 52 |
+
|
| 53 |
+
- `trunk_only_ft=0.06`
|
| 54 |
+
- `adapter_noop=0.06`
|
| 55 |
+
- `adapter_active_ft=0.06`
|
| 56 |
+
- `intervention_rate=0.0`
|
| 57 |
+
- `non_base_selection_rate=0.0`
|
| 58 |
+
|
| 59 |
+
Interpretation:
|
| 60 |
+
|
| 61 |
+
- rollout supervision and transition-model training alone were not enough
|
| 62 |
+
- the adapter remained inert
|
| 63 |
+
|
| 64 |
+
### 2. `smoke_v4_evalprobe_fromv3` corrected-planner eval on `smoke_v3` weights
|
| 65 |
+
|
| 66 |
+
Artifacts:
|
| 67 |
+
|
| 68 |
+
- summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v4_evalprobe_fromv3/public_benchmark_package_summary.json`
|
| 69 |
+
|
| 70 |
+
Result:
|
| 71 |
+
|
| 72 |
+
- `trunk_only_ft=0.06`
|
| 73 |
+
- `adapter_noop=0.06`
|
| 74 |
+
- `adapter_active_ft=0.62`
|
| 75 |
+
- `delta_active_vs_trunk=+0.56`
|
| 76 |
+
- `95% CI=[+0.40, +0.70]`
|
| 77 |
+
- `intervention_rate=1.0`
|
| 78 |
+
- `non_base_selection_rate=1.0`
|
| 79 |
+
|
| 80 |
+
Interpretation:
|
| 81 |
+
|
| 82 |
+
- this is the first real adapter-specific sign of life on the public benchmark
|
| 83 |
+
- the corrected planner logic is doing the work
|
| 84 |
+
- the improvement is not coming from the shared trunk, because `adapter_noop` stayed at `0.06`
|
| 85 |
+
|
| 86 |
+
### 3. `smoke_v4` clean retrain with corrected planner active during train and eval
|
| 87 |
+
|
| 88 |
+
Artifacts:
|
| 89 |
+
|
| 90 |
+
- summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v4/public_benchmark_package_summary.json`
|
| 91 |
+
|
| 92 |
+
Result:
|
| 93 |
+
|
| 94 |
+
- `trunk_only_ft=0.48`
|
| 95 |
+
- `adapter_noop=0.04`
|
| 96 |
+
- `adapter_active_ft=0.04`
|
| 97 |
+
- `intervention_rate=1.0`
|
| 98 |
+
- `non_base_selection_rate=1.0`
|
| 99 |
+
- `delta_active_vs_trunk=-0.44`
|
| 100 |
+
|
| 101 |
+
Interpretation:
|
| 102 |
+
|
| 103 |
+
- the clean retrain under corrected planner logic is unstable / regressive
|
| 104 |
+
- the adapter-trained checkpoint collapsed even though active mode intervened
|
| 105 |
+
- current evidence supports the corrected planner as a real eval-time model fix, but not yet as a stable retrain recipe
|
| 106 |
+
|
| 107 |
+
### 4. `smoke_v5` fair retrain with trunk-action supervision preserved inside adapter training
|
| 108 |
+
|
| 109 |
+
Artifacts:
|
| 110 |
+
|
| 111 |
+
- summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5/public_benchmark_package_summary.json`
|
| 112 |
+
|
| 113 |
+
Result:
|
| 114 |
+
|
| 115 |
+
- `trunk_only_ft=0.04`
|
| 116 |
+
- `adapter_noop=0.04`
|
| 117 |
+
- `adapter_active_ft=0.04`
|
| 118 |
+
- `intervention_rate=1.0`
|
| 119 |
+
- `non_base_selection_rate=1.0`
|
| 120 |
+
- `delta_active_vs_trunk=0.00`
|
| 121 |
+
|
| 122 |
+
Interpretation:
|
| 123 |
+
|
| 124 |
+
- this fixed the fairness problem from `smoke_v4`: the adapter-trained checkpoint no longer hid a stronger trunk, because `adapter_noop` matched `trunk_only_ft`
|
| 125 |
+
- but the active branch still failed because the planner collapsed to `maintain_gap` on every decision
|
| 126 |
+
|
| 127 |
+
### 5. `smoke_v5_val_sweep` and held-out `smoke_v5_eval_tuned_softerpref`
|
| 128 |
+
|
| 129 |
+
Artifacts:
|
| 130 |
+
|
| 131 |
+
- val sweep: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_val_sweep/summary.json`
|
| 132 |
+
- held-out summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 133 |
+
|
| 134 |
+
Val-selected planner override:
|
| 135 |
+
|
| 136 |
+
- `mode_preference_bonus=0.75`
|
| 137 |
+
- `premature_retrieve_penalty=0.5`
|
| 138 |
+
- `premature_insert_penalty=0.25`
|
| 139 |
+
- `premature_maintain_penalty=1.0`
|
| 140 |
+
- `occlusion_maintain_gap_min_access=0.30`
|
| 141 |
+
- `occlusion_maintain_gap_min_visibility=0.20`
|
| 142 |
+
- `retrieve_stage_access_threshold=0.18`
|
| 143 |
+
- `retrieve_stage_reveal_threshold=0.18`
|
| 144 |
+
- `retrieve_stage_support_threshold=0.18`
|
| 145 |
+
|
| 146 |
+
Validation result:
|
| 147 |
+
|
| 148 |
+
- `baseline_corrected=0.00`
|
| 149 |
+
- `soft_pref=0.00`
|
| 150 |
+
- `softer_pref=0.625`
|
| 151 |
+
- `retrieve_open=0.625`
|
| 152 |
+
|
| 153 |
+
Held-out result:
|
| 154 |
+
|
| 155 |
+
- `trunk_only_ft=0.04`
|
| 156 |
+
- `adapter_noop=0.04`
|
| 157 |
+
- `adapter_active_ft=0.62`
|
| 158 |
+
- `delta_active_vs_trunk=+0.58`
|
| 159 |
+
- `95% CI=[+0.44, +0.72]`
|
| 160 |
+
- `intervention_rate=1.0`
|
| 161 |
+
- `non_base_selection_rate=1.0`
|
| 162 |
+
- `steps_to_retrieve=1.0`
|
| 163 |
+
- `signs_of_life=true`
|
| 164 |
+
|
| 165 |
+
Interpretation:
|
| 166 |
+
|
| 167 |
+
- this is a fair held-out public-benchmark win on the dense-occlusion proxy
|
| 168 |
+
- the gain is adapter-specific because `adapter_noop` stayed flat with the trunk baseline
|
| 169 |
+
- the fixed checkpoint from `smoke_v5` was viable; the missing piece was planner-stage calibration on the frozen validation split
|
| 170 |
+
|
| 171 |
+
## Current Best Public-Benchmark Evidence
|
| 172 |
+
|
| 173 |
+
Best adapter-specific evidence currently available:
|
| 174 |
+
|
| 175 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 176 |
+
|
| 177 |
+
Why this is the strongest result:
|
| 178 |
+
|
| 179 |
+
- same frozen public train/val/eval split
|
| 180 |
+
- same trained trunk baseline and adapter checkpoint
|
| 181 |
+
- planner override selected on the frozen validation split before the held-out eval run
|
| 182 |
+
- `adapter_noop` isolates the shared-trunk effect and stays flat
|
| 183 |
+
- only `adapter_active_ft` improves, so the gain is caused by live adapter intervention
|
| 184 |
+
|
| 185 |
+
## Open Problem
|
| 186 |
+
|
| 187 |
+
The dense-occlusion proxy now has a fair held-out win, but bag-style and cloth-style public proxy tracks are still missing. The next work item is to bring up the next public proxy benchmark instead of re-running more occlusion-only sweeps.
|
code/VLAarchtests4_root/docs/minimum_sign_of_life_maniskill_pickclutter_run_2026-04-01.md
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Minimum Sign-of-Life Runbook: ManiSkill PickClutterYCB-v1
|
| 2 |
+
|
| 3 |
+
Date: 2026-04-01
|
| 4 |
+
|
| 5 |
+
## Goal
|
| 6 |
+
|
| 7 |
+
Run one real public-benchmark smoke on ManiSkill `PickClutterYCB-v1` that fairly compares:
|
| 8 |
+
|
| 9 |
+
- `trunk_only_ft`
|
| 10 |
+
- `adapter_noop`
|
| 11 |
+
- `adapter_active_ft`
|
| 12 |
+
|
| 13 |
+
The target claim for this run is narrow:
|
| 14 |
+
|
| 15 |
+
- on dense occluded retrieval, the adapter should visibly intervene and beat the trunk-only control trained on the same data;
|
| 16 |
+
- this is a minimum-sign-of-life run, not the full 3-track package.
|
| 17 |
+
|
| 18 |
+
## Correction After First Failed Smoke
|
| 19 |
+
|
| 20 |
+
The first smoke run confirmed that the real ManiSkill benchmark path worked, but the adapter stayed inert because the smoke dataset only supervised proposal ranking. The corrected run keeps the same benchmark and fairness contract and restarts both trained variants from the same init on the same frozen split, but adds benchmark-derived current-state supervision for the adapter:
|
| 21 |
+
|
| 22 |
+
- `support_mode`
|
| 23 |
+
- `corridor_feasible`
|
| 24 |
+
- `persistence_horizon`
|
| 25 |
+
- `disturbance_cost`
|
| 26 |
+
- selected task metrics that feed the adapter gate and mode bias
|
| 27 |
+
- `state_confidence_target`
|
| 28 |
+
|
| 29 |
+
This corrected artifact set is versioned as `smoke_v2`.
|
| 30 |
+
|
| 31 |
+
## Fixed Protocol
|
| 32 |
+
|
| 33 |
+
### Benchmark
|
| 34 |
+
|
| 35 |
+
- Public benchmark: ManiSkill `PickClutterYCB-v1`
|
| 36 |
+
- Track id: `occlusion_track`
|
| 37 |
+
- Task family label inside the adapter stack: `foliage`
|
| 38 |
+
- Resolution: `224`
|
| 39 |
+
- One seed for training: `17`
|
| 40 |
+
|
| 41 |
+
### Data
|
| 42 |
+
|
| 43 |
+
- One benchmark-native scripted macro teacher collects the dataset once.
|
| 44 |
+
- The teacher also writes per-step scene-state labels from the same public benchmark state and candidate sweep.
|
| 45 |
+
- Train demos: `32`
|
| 46 |
+
- Val demos: `8`
|
| 47 |
+
- Held-out eval episodes: `50`
|
| 48 |
+
- The exact train, val, and eval episode ids/seeds are frozen once generated and written to disk.
|
| 49 |
+
|
| 50 |
+
### Fairness Constraints
|
| 51 |
+
|
| 52 |
+
- Both trained conditions start from the same initialization checkpoint.
|
| 53 |
+
- Both use the same train/val split.
|
| 54 |
+
- Both use the same held-out eval episodes.
|
| 55 |
+
- Both use the same optimizer, batch size, LR, epoch budget, and random seed.
|
| 56 |
+
- Both fine-tune the same trunk submodules:
|
| 57 |
+
- fusion
|
| 58 |
+
- memory
|
| 59 |
+
- decoder
|
| 60 |
+
- The backbone stays frozen for both conditions.
|
| 61 |
+
- The only architectural difference is the presence of the structured adapter.
|
| 62 |
+
- `candidate0` remains the raw trunk action.
|
| 63 |
+
- `adapter_noop` is eval-only and is produced from the `adapter_active_ft` checkpoint.
|
| 64 |
+
|
| 65 |
+
### Initialization
|
| 66 |
+
|
| 67 |
+
- Shared init checkpoint:
|
| 68 |
+
- `/workspace/workspace/VLAarchtests2/VLAarchtests/artifacts/outputs/r3d_handoff_phase/proxy_interaction_r3d_stage3_clip_rgbd_handoff_compact_phase_seed17/checkpoint_best.pt`
|
| 69 |
+
- Load with shape filtering / `init_strict: false`.
|
| 70 |
+
|
| 71 |
+
### Training Conditions
|
| 72 |
+
|
| 73 |
+
#### `trunk_only_ft`
|
| 74 |
+
|
| 75 |
+
- Policy type: `foundation_trunk`
|
| 76 |
+
- Trainable prefixes:
|
| 77 |
+
- `fusion`
|
| 78 |
+
- `memory`
|
| 79 |
+
- `decoder`
|
| 80 |
+
|
| 81 |
+
#### `adapter_active_ft`
|
| 82 |
+
|
| 83 |
+
- Policy type: `adapter_wrapped`
|
| 84 |
+
- Trainable prefixes:
|
| 85 |
+
- `trunk.fusion`
|
| 86 |
+
- `trunk.memory`
|
| 87 |
+
- `trunk.decoder`
|
| 88 |
+
- `adapter.state_head`
|
| 89 |
+
- `adapter.proposal_prior`
|
| 90 |
+
- `adapter.planner`
|
| 91 |
+
- Transition model: off for the smoke run
|
| 92 |
+
|
| 93 |
+
### Evaluation Conditions
|
| 94 |
+
|
| 95 |
+
- `trunk_only_ft`: trained trunk checkpoint
|
| 96 |
+
- `adapter_noop`: adapter checkpoint in no-op mode
|
| 97 |
+
- `adapter_active_ft`: adapter checkpoint in active mode
|
| 98 |
+
|
| 99 |
+
### Success Criteria
|
| 100 |
+
|
| 101 |
+
- Adapter success exceeds trunk-only success by at least `5` absolute points on the held-out `50` episodes.
|
| 102 |
+
- Adapter intervention rate is at least `15%`.
|
| 103 |
+
- Non-base selection rate is at least `15%`.
|
| 104 |
+
- Results are summarized through the public benchmark package reporter under the `occlusion_track`.
|
| 105 |
+
|
| 106 |
+
## Planned Artifacts
|
| 107 |
+
|
| 108 |
+
### Data
|
| 109 |
+
|
| 110 |
+
- `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/train.pt`
|
| 111 |
+
- `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/val.pt`
|
| 112 |
+
- `/workspace/workspace/data/maniskill_pickclutter/smoke_v2/episode_splits.json`
|
| 113 |
+
|
| 114 |
+
### Train Outputs
|
| 115 |
+
|
| 116 |
+
- `/workspace/workspace/outputs/maniskill_pickclutter_smoke_v2/trunk_only_ft_seed17/`
|
| 117 |
+
- `/workspace/workspace/outputs/maniskill_pickclutter_smoke_v2/adapter_active_ft_seed17/`
|
| 118 |
+
|
| 119 |
+
### Eval Outputs
|
| 120 |
+
|
| 121 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/trunk_only_ft_seed17.json`
|
| 122 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/adapter_noop_seed17.json`
|
| 123 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/adapter_active_ft_seed17.json`
|
| 124 |
+
|
| 125 |
+
### Package Summary
|
| 126 |
+
|
| 127 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.json`
|
| 128 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v2/public_benchmark_package_summary.md`
|
| 129 |
+
|
| 130 |
+
## Notes
|
| 131 |
+
|
| 132 |
+
- This run is intentionally limited to the fastest credible public target track.
|
| 133 |
+
- No custom benchmark or custom teleop suite is allowed for this smoke.
|
| 134 |
+
- If the ManiSkill runtime or macro controller requires repairs, the repairs must preserve the fairness constraints above.
|
code/VLAarchtests4_root/docs/public_benchmark_progress_2026-04-01.md
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
## Public Benchmark Progress
|
| 2 |
+
|
| 3 |
+
Date: 2026-04-01 UTC
|
| 4 |
+
|
| 5 |
+
### Confirmed Real Public Benchmark Result
|
| 6 |
+
|
| 7 |
+
- Public occlusion proxy: `ManiSkill PickClutterYCB-v1`
|
| 8 |
+
- Strongest adapter-specific result so far:
|
| 9 |
+
- summary: `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 10 |
+
- `trunk_only_ft = 0.04`
|
| 11 |
+
- `adapter_noop = 0.04`
|
| 12 |
+
- `adapter_active_ft = 0.62`
|
| 13 |
+
- `delta_active_vs_trunk = +0.58`
|
| 14 |
+
- `95% CI = [0.44, 0.72]`
|
| 15 |
+
- `intervention_rate = 1.0`
|
| 16 |
+
- `non_base_selection_rate = 1.0`
|
| 17 |
+
- Interpretation:
|
| 18 |
+
- this is real adapter-specific sign of life on a public occlusion benchmark
|
| 19 |
+
- the gain is not coming from a stronger shared trunk, because `adapter_noop` stays flat
|
| 20 |
+
|
| 21 |
+
### BEHAVIOR Bag Proxy Investigation
|
| 22 |
+
|
| 23 |
+
Target public task family:
|
| 24 |
+
- official BEHAVIOR grocery-store bag/container retrieval proxy
|
| 25 |
+
- primary candidate: `paying_for_purchases`
|
| 26 |
+
- stricter but currently unusable candidate: `buy_basic_garden_tools`
|
| 27 |
+
|
| 28 |
+
Environment used:
|
| 29 |
+
- BEHAVIOR assets: `/workspace/workspace/BEHAVIOR-1K`
|
| 30 |
+
- venv used for probes: `/workspace/envs/behavior`
|
| 31 |
+
|
| 32 |
+
Findings:
|
| 33 |
+
- `buy_basic_garden_tools` is blocked by official scene-task geometry:
|
| 34 |
+
- repeated failure on `ontop ['rake.n.03_1', 'grocery_shelf.n.01_1']`
|
| 35 |
+
- even with whitelist attempts, the sampler never found a valid shelf placement
|
| 36 |
+
- `paying_for_purchases` is much healthier:
|
| 37 |
+
- `grocery_store_convenience`, `grocery_store_cafe`, and `grocery_store_asian` all load
|
| 38 |
+
- object scope binds the real task objects:
|
| 39 |
+
- `shopping_basket.n.01_1`
|
| 40 |
+
- `money.n.01_1`
|
| 41 |
+
- `checkout.n.03_1`
|
| 42 |
+
- `floor.n.01_1`
|
| 43 |
+
- Root sampler bug:
|
| 44 |
+
- official online sampling fails on the floor / agent chain
|
| 45 |
+
- without patching, the blocking warning is:
|
| 46 |
+
- `Room type [grocery_store] ... floor.n.01_1: , checkout.n.03_1: grocery_store_0`
|
| 47 |
+
- after removing the agent-on-floor condition from the sampler pipeline, the next blocker is:
|
| 48 |
+
- `ontop ['shopping_basket.n.01_1', 'floor.n.01_1'] False`
|
| 49 |
+
- Critical state-probe result:
|
| 50 |
+
- even when object bindings exist, the sampled movable objects remain parked at their far-away import positions
|
| 51 |
+
- observed example on `grocery_store_asian`:
|
| 52 |
+
- basket position near `[120, 120, -80]`
|
| 53 |
+
- money position near `[115, 115, -85]`
|
| 54 |
+
- apples position near `[110, 110, -90]` and `[105, 105, -95]`
|
| 55 |
+
- `money inside basket = False`
|
| 56 |
+
- `apple1 inside basket = False`
|
| 57 |
+
- `apple2 inside basket = False`
|
| 58 |
+
- Conclusion:
|
| 59 |
+
- as of 2026-04-01, the BEHAVIOR bag proxy is not yet a usable fair evaluation track in this workspace
|
| 60 |
+
- the public task objects bind, but the online sampler does not materialize a valid initial scene for training or evaluation
|
| 61 |
+
|
| 62 |
+
### Garment / Cloth Proxy Status
|
| 63 |
+
|
| 64 |
+
- GarmentLab repo cloned:
|
| 65 |
+
- `/workspace/workspace/GarmentLab`
|
| 66 |
+
- Immediate constraint:
|
| 67 |
+
- the repo expects Isaac Sim 4.0.0 plus external Google Drive assets
|
| 68 |
+
- Current status:
|
| 69 |
+
- code inspected only
|
| 70 |
+
- no runnable public cloth benchmark execution completed yet in this workspace
|
| 71 |
+
|
| 72 |
+
### Next Public Proxy Candidates
|
| 73 |
+
|
| 74 |
+
Given the BEHAVIOR blocker, the next-lightest public candidates already available locally are:
|
| 75 |
+
|
| 76 |
+
- `OpenCabinetDrawer-v1`
|
| 77 |
+
- public ManiSkill task
|
| 78 |
+
- good container reveal / access proxy
|
| 79 |
+
- `PutEggplantInBasketScene-v1`
|
| 80 |
+
- public ManiSkill bridge-dataset task
|
| 81 |
+
- public basket / container interaction proxy
|
| 82 |
+
- `PutSpoonOnTableClothInScene-v1`
|
| 83 |
+
- public ManiSkill bridge-dataset cloth interaction proxy
|
| 84 |
+
|
| 85 |
+
### Immediate Recommendation
|
| 86 |
+
|
| 87 |
+
- Keep the confirmed `PickClutterYCB-v1` result as the anchor public success case.
|
| 88 |
+
- Do not spend more time on BEHAVIOR online sampling until either:
|
| 89 |
+
- a cached valid scene instance is created, or
|
| 90 |
+
- the sampler is patched deeply enough to place container objects correctly instead of leaving them at far-away import positions.
|
| 91 |
+
- Pivot the next train/eval smoke to a lighter public ManiSkill proxy before returning to BEHAVIOR.
|
code/VLAarchtests4_root/docs/public_bridge_smoke_run_log_2026-04-01.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Public Bridge Smoke Run Log
|
| 2 |
+
|
| 3 |
+
Date: 2026-04-01 UTC
|
| 4 |
+
|
| 5 |
+
## Completed public proxy evidence
|
| 6 |
+
|
| 7 |
+
- Occlusion proxy already completed earlier on `PickClutterYCB-v1`.
|
| 8 |
+
- Best current occlusion report:
|
| 9 |
+
- `/workspace/workspace/reports/maniskill_pickclutter_smoke_v5_eval_tuned_softerpref/public_benchmark_package_summary.json`
|
| 10 |
+
- `trunk_only_ft=0.04`
|
| 11 |
+
- `adapter_noop=0.04`
|
| 12 |
+
- `adapter_active_ft=0.62`
|
| 13 |
+
- `delta_active_vs_trunk=+0.58`
|
| 14 |
+
- `95% CI [0.44, 0.72]`
|
| 15 |
+
- `intervention_rate=1.0`
|
| 16 |
+
- `non_base_selection_rate=1.0`
|
| 17 |
+
|
| 18 |
+
- Bag proxy completed on the public ManiSkill bridge basket scene proxy.
|
| 19 |
+
- Bag report directory:
|
| 20 |
+
- `/workspace/workspace/reports/maniskill_bag_bridge_smoke_v1`
|
| 21 |
+
- Bag result summary:
|
| 22 |
+
- `trunk_only_ft=0.32`
|
| 23 |
+
- `adapter_noop=0.00`
|
| 24 |
+
- `adapter_active_ft=0.48`
|
| 25 |
+
- `delta_active_vs_trunk=+0.16`
|
| 26 |
+
- `delta_active_vs_trunk_ci95=[-0.04, 0.34]`
|
| 27 |
+
- `intervention_rate=1.0`
|
| 28 |
+
- `non_base_selection_rate=1.0`
|
| 29 |
+
- bag track `signs_of_life=true`
|
| 30 |
+
- package-level headline gate remains false at this single-seed smoke scale
|
| 31 |
+
|
| 32 |
+
## Cloth proxy definition
|
| 33 |
+
|
| 34 |
+
- Public scene proxy:
|
| 35 |
+
- `PutSpoonOnTableClothInScene-v1`
|
| 36 |
+
- Fixed hidden-state initialization:
|
| 37 |
+
- spoon pose `[-0.235, -0.094, 0.8748]`
|
| 38 |
+
- cloth pose `[-0.235, -0.075, 0.885]`
|
| 39 |
+
- Deterministic valid-seed filter:
|
| 40 |
+
- accept only seeds whose initialized hidden state is below the visibility gate and solvable by scripted reveal+retrieve
|
| 41 |
+
- Reveal macros corrected to push-style actions:
|
| 42 |
+
- `lift_edge` = front push in `+y`
|
| 43 |
+
- `separate_layer` = side push in `+x`
|
| 44 |
+
- Cloth success metric corrected:
|
| 45 |
+
- based on spoon displacement from its own hidden start plus visibility
|
| 46 |
+
- no longer credits success merely because the cloth flies away
|
| 47 |
+
|
| 48 |
+
## Important runner fixes already landed
|
| 49 |
+
|
| 50 |
+
- File:
|
| 51 |
+
- `/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py`
|
| 52 |
+
- Fixed:
|
| 53 |
+
- cloth hidden-state initialization
|
| 54 |
+
- cloth seed filtering and split reuse via `episode_splits.json`
|
| 55 |
+
- `post_bundle` missing in cloth collect success check
|
| 56 |
+
- bridge smoke loss weights aligned to current `LossWeights`
|
| 57 |
+
- adapter trainable parameter prefixes aligned to working pickclutter runner
|
| 58 |
+
- zero-depth layout changed to channel-first
|
| 59 |
+
- cached dataset normalizer added for old channel-last depth tensors
|
| 60 |
+
|
| 61 |
+
## Live status when this note was written
|
| 62 |
+
|
| 63 |
+
- Bag process is complete.
|
| 64 |
+
- Cloth process is still collecting the train split in the original long-running session.
|
| 65 |
+
- The long-running cloth process was started before the later loss-weight and depth-layout fixes, so it is expected to finish collection and then crash at training start.
|
| 66 |
+
- After it writes `train.pt` and `val.pt`, restart cloth with:
|
| 67 |
+
|
| 68 |
+
```bash
|
| 69 |
+
python /workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py --task cloth --skip-collection
|
| 70 |
+
```
|
| 71 |
+
|
| 72 |
+
- If trunk checkpoint already exists by that point and only adapter needs rerun:
|
| 73 |
+
|
| 74 |
+
```bash
|
| 75 |
+
python /workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py --task cloth --skip-collection --reuse-checkpoints
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
## Cloth restart correction
|
| 79 |
+
|
| 80 |
+
- The corrected cloth restart reached adapter training and failed in rollout supervision because the cached cloth public proxy authored `7` candidate targets while the decoder always allocates `8` proposal slots.
|
| 81 |
+
- Fix landed in:
|
| 82 |
+
- `/workspace/workspace/VLAarchtests3_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual/eval/run_maniskill_bridge_retrieval_smoke.py`
|
| 83 |
+
- Correction:
|
| 84 |
+
- cached bridge samples now normalize channel-last depth tensors as before
|
| 85 |
+
- cached candidate-aligned tensors now also pad from `7 -> 8` slots before loading
|
| 86 |
+
- padding cycles the non-base candidates first, which preserves the collected cloth episodes and avoids recollection
|
| 87 |
+
- Verified locally before restart:
|
| 88 |
+
- normalized cloth `candidate_action_chunks` is `(8, 8, 14)`
|
| 89 |
+
- normalized cloth `candidate_rollout_support_mode` is `(8, 5)`
|
| 90 |
+
- one real `adapter_active_ft` training step and one real validation loss pass both completed without the previous shape error
|
| 91 |
+
|
| 92 |
+
## Cloth result
|
| 93 |
+
|
| 94 |
+
- Report directory:
|
| 95 |
+
- `/workspace/workspace/reports/maniskill_cloth_bridge_smoke_v1`
|
| 96 |
+
- Final cloth smoke summary:
|
| 97 |
+
- `trunk_only_ft = 0.04`
|
| 98 |
+
- `adapter_noop = 0.04`
|
| 99 |
+
- `adapter_active_ft = 0.10`
|
| 100 |
+
- `delta_active_vs_trunk = +0.06`
|
| 101 |
+
- `delta_active_vs_trunk_ci95 = [-0.04, 0.16]`
|
| 102 |
+
- `intervention_rate = 0.3369`
|
| 103 |
+
- `non_base_selection_rate = 0.2674`
|
| 104 |
+
- Interpretation:
|
| 105 |
+
- cloth proxy is positive and adapter-specific in this single-seed smoke because `adapter_noop` stayed flat while `adapter_active_ft` improved
|
| 106 |
+
- effect size is modest and not yet statistically clean in this smoke protocol
|
| 107 |
+
|
| 108 |
+
## Combined three-track proxy suite
|
| 109 |
+
|
| 110 |
+
- Combined report:
|
| 111 |
+
- `/workspace/workspace/reports/public_proxy_suite_smoke_v1/combined_summary.json`
|
| 112 |
+
- `/workspace/workspace/reports/public_proxy_suite_smoke_v1/combined_summary.md`
|
| 113 |
+
- Current three-track smoke evidence:
|
| 114 |
+
- occlusion proxy positive and adapter-specific
|
| 115 |
+
- bag proxy positive and adapter-specific
|
| 116 |
+
- cloth proxy positive and adapter-specific
|
code/VLAarchtests4_root/setup/public_benchmark/ENVIRONMENT.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Public Benchmark Environment Manifest
|
| 2 |
+
|
| 3 |
+
This file describes the environment used for the current ManiSkill public benchmark runs staged in `VLAarchtests4`.
|
| 4 |
+
|
| 5 |
+
## Hardware
|
| 6 |
+
|
| 7 |
+
- GPU: `NVIDIA L40S`
|
| 8 |
+
- VRAM: `46068 MiB`
|
| 9 |
+
- Driver: `580.126.09`
|
| 10 |
+
- Kernel: `Linux 6.8.0-88-generic`
|
| 11 |
+
- Python: `3.11.10`
|
| 12 |
+
|
| 13 |
+
Raw snapshots:
|
| 14 |
+
|
| 15 |
+
- `setup/public_benchmark/nvidia_smi.txt`
|
| 16 |
+
- `setup/public_benchmark/gpu_short.txt`
|
| 17 |
+
- `setup/public_benchmark/uname.txt`
|
| 18 |
+
- `setup/public_benchmark/python_version.txt`
|
| 19 |
+
|
| 20 |
+
## Python Packages
|
| 21 |
+
|
| 22 |
+
Package snapshots:
|
| 23 |
+
|
| 24 |
+
- current Python 3.11 env:
|
| 25 |
+
- `setup/public_benchmark/pip_freeze_python311.txt`
|
| 26 |
+
- prior RLBench env snapshot preserved for continuity:
|
| 27 |
+
- `setup/public_benchmark/rlbench_env_pip_freeze.txt`
|
| 28 |
+
|
| 29 |
+
Hugging Face CLI environment:
|
| 30 |
+
|
| 31 |
+
- `setup/public_benchmark/hf_env.txt`
|
| 32 |
+
|
| 33 |
+
## Runtime Variables For The Public Benchmark Stack
|
| 34 |
+
|
| 35 |
+
The ManiSkill public benchmark code paths set runtime variables equivalent to:
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
export VK_ICD_FILENAMES=/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json
|
| 39 |
+
export VK_LAYER_PATH=/workspace/runtime/vulkan/implicit_layer.d
|
| 40 |
+
export XDG_RUNTIME_DIR=/tmp/runtime-root
|
| 41 |
+
export MS_ASSET_DIR=/workspace/data/maniskill
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
The local project code path still needs:
|
| 45 |
+
|
| 46 |
+
```bash
|
| 47 |
+
export PYTHONPATH=/workspace/workspace/VLAarchtests4_export/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual:${PYTHONPATH:-}
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
Convenience helper:
|
| 51 |
+
|
| 52 |
+
- `setup/public_benchmark/env_vars.sh`
|
| 53 |
+
|
| 54 |
+
## Important Distinction From Older RLBench Stack
|
| 55 |
+
|
| 56 |
+
The older `VLAarchtests3` environment files under `setup/` are still preserved and remain relevant for:
|
| 57 |
+
|
| 58 |
+
- AnyBimanual
|
| 59 |
+
- RLBench
|
| 60 |
+
- CoppeliaSim / PyRep
|
| 61 |
+
|
| 62 |
+
The current public benchmark ManiSkill runs were primarily executed from the current Python 3.11 environment captured here, not from the older Python 3.10 RLBench stack described in `setup/ENVIRONMENT.md`.
|
| 63 |
+
|
| 64 |
+
## Caveats
|
| 65 |
+
|
| 66 |
+
- PickClutter used real `rgbd_3cam`.
|
| 67 |
+
- The bridge bag/cloth proxy runner used a compatibility shim that duplicated one RGB view across all camera slots and zero-filled depth.
|
| 68 |
+
- Reproducing those exact bridge results requires keeping that current runner behavior unchanged.
|
code/VLAarchtests4_root/setup/public_benchmark/env_vars.sh
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
set -euo pipefail
|
| 3 |
+
|
| 4 |
+
export VK_ICD_FILENAMES="${VK_ICD_FILENAMES:-/workspace/runtime/vulkan/icd.d/nvidia_icd_egl.json}"
|
| 5 |
+
export VK_LAYER_PATH="${VK_LAYER_PATH:-/workspace/runtime/vulkan/implicit_layer.d}"
|
| 6 |
+
export XDG_RUNTIME_DIR="${XDG_RUNTIME_DIR:-/tmp/runtime-root}"
|
| 7 |
+
export MS_ASSET_DIR="${MS_ASSET_DIR:-/workspace/data/maniskill}"
|
| 8 |
+
|
| 9 |
+
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
| 10 |
+
export PYTHONPATH="${ROOT_DIR}/code/VLAarchtests2_code/VLAarchtests/code/reveal_vla_bimanual:${PYTHONPATH:-}"
|
code/VLAarchtests4_root/setup/public_benchmark/gpu_short.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
NVIDIA L40S, 46068 MiB, 580.126.09
|
code/VLAarchtests4_root/setup/public_benchmark/hf_env.txt
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
Copy-and-paste the text below in your GitHub issue.
|
| 3 |
+
|
| 4 |
+
- huggingface_hub version: 1.8.0
|
| 5 |
+
- Platform: Linux-6.8.0-88-generic-x86_64-with-glibc2.35
|
| 6 |
+
- Python version: 3.11.10
|
| 7 |
+
- Running in iPython ?: No
|
| 8 |
+
- Running in notebook ?: No
|
| 9 |
+
- Running in Google Colab ?: No
|
| 10 |
+
- Running in Google Colab Enterprise ?: No
|
| 11 |
+
- Token path ?: /workspace/.cache/huggingface/token
|
| 12 |
+
- Has saved token ?: True
|
| 13 |
+
- Who am I ?: lsnu
|
| 14 |
+
- Configured git credential helpers:
|
| 15 |
+
- Installation method: hf_installer
|
| 16 |
+
- httpx: 0.28.1
|
| 17 |
+
- hf_xet: 1.4.2
|
| 18 |
+
- gradio: N/A
|
| 19 |
+
- tensorboard: N/A
|
| 20 |
+
- ENDPOINT: https://huggingface.co
|
| 21 |
+
- HF_HUB_CACHE: /workspace/.cache/huggingface/hub
|
| 22 |
+
- HF_ASSETS_CACHE: /workspace/.cache/huggingface/assets
|
| 23 |
+
- HF_TOKEN_PATH: /workspace/.cache/huggingface/token
|
| 24 |
+
- HF_STORED_TOKENS_PATH: /workspace/.cache/huggingface/stored_tokens
|
| 25 |
+
- HF_HUB_OFFLINE: False
|
| 26 |
+
- HF_HUB_DISABLE_TELEMETRY: False
|
| 27 |
+
- HF_HUB_DISABLE_PROGRESS_BARS: None
|
| 28 |
+
- HF_HUB_DISABLE_SYMLINKS_WARNING: False
|
| 29 |
+
- HF_HUB_DISABLE_EXPERIMENTAL_WARNING: False
|
| 30 |
+
- HF_HUB_DISABLE_IMPLICIT_TOKEN: False
|
| 31 |
+
- HF_HUB_DISABLE_XET: False
|
| 32 |
+
- HF_HUB_ETAG_TIMEOUT: 10
|
| 33 |
+
- HF_HUB_DOWNLOAD_TIMEOUT: 10
|
| 34 |
+
- HF_XET_HIGH_PERFORMANCE: False
|
| 35 |
+
|
code/VLAarchtests4_root/setup/public_benchmark/nvidia_smi.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Wed Apr 1 13:02:04 2026
|
| 2 |
+
+-----------------------------------------------------------------------------------------+
|
| 3 |
+
| NVIDIA-SMI 580.126.09 Driver Version: 580.126.09 CUDA Version: 13.0 |
|
| 4 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 5 |
+
| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |
|
| 6 |
+
| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |
|
| 7 |
+
| | | MIG M. |
|
| 8 |
+
|=========================================+========================+======================|
|
| 9 |
+
| 0 NVIDIA L40S On | 00000000:05:00.0 Off | 0 |
|
| 10 |
+
| N/A 31C P8 34W / 350W | 54MiB / 46068MiB | 0% Default |
|
| 11 |
+
| | | N/A |
|
| 12 |
+
+-----------------------------------------+------------------------+----------------------+
|
| 13 |
+
|
| 14 |
+
+-----------------------------------------------------------------------------------------+
|
| 15 |
+
| Processes: |
|
| 16 |
+
| GPU GI CI PID Type Process name GPU Memory |
|
| 17 |
+
| ID ID Usage |
|
| 18 |
+
|=========================================================================================|
|
| 19 |
+
| 0 N/A N/A 76839 G /usr/lib/xorg/Xorg 37MiB |
|
| 20 |
+
| 0 N/A N/A 76839 G /usr/lib/xorg/Xorg 37MiB |
|
| 21 |
+
+-----------------------------------------------------------------------------------------+
|
code/VLAarchtests4_root/setup/public_benchmark/pip_freeze_python311.txt
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
absl-py==2.4.0
|
| 2 |
+
accelerate==1.13.0
|
| 3 |
+
annotated-doc==0.0.4
|
| 4 |
+
antlr4-python3-runtime==4.9.3
|
| 5 |
+
anyio==4.6.0
|
| 6 |
+
argon2-cffi==23.1.0
|
| 7 |
+
argon2-cffi-bindings==21.2.0
|
| 8 |
+
arm_pytorch_utilities==0.5.0
|
| 9 |
+
arrow==1.3.0
|
| 10 |
+
asttokens==2.4.1
|
| 11 |
+
async-lru==2.0.4
|
| 12 |
+
attrs==24.2.0
|
| 13 |
+
babel==2.16.0
|
| 14 |
+
beautifulsoup4==4.12.3
|
| 15 |
+
bleach==6.1.0
|
| 16 |
+
blinker==1.4
|
| 17 |
+
certifi==2024.8.30
|
| 18 |
+
cffi==1.17.1
|
| 19 |
+
charset-normalizer==3.3.2
|
| 20 |
+
click==8.3.1
|
| 21 |
+
cloudpickle==3.1.2
|
| 22 |
+
comm==0.2.2
|
| 23 |
+
contourpy==1.3.3
|
| 24 |
+
cryptography==3.4.8
|
| 25 |
+
cycler==0.12.1
|
| 26 |
+
dacite==1.9.2
|
| 27 |
+
dbus-python==1.2.18
|
| 28 |
+
debugpy==1.8.5
|
| 29 |
+
decorator==5.1.1
|
| 30 |
+
defusedxml==0.7.1
|
| 31 |
+
distro==1.7.0
|
| 32 |
+
docstring_parser==0.17.0
|
| 33 |
+
einops==0.8.2
|
| 34 |
+
entrypoints==0.4
|
| 35 |
+
executing==2.1.0
|
| 36 |
+
Farama-Notifications==0.0.4
|
| 37 |
+
fast_kinematics==0.2.2
|
| 38 |
+
fastjsonschema==2.20.0
|
| 39 |
+
filelock==3.13.1
|
| 40 |
+
fonttools==4.62.1
|
| 41 |
+
fqdn==1.5.1
|
| 42 |
+
fsspec==2024.2.0
|
| 43 |
+
gitdb==4.0.12
|
| 44 |
+
GitPython==3.1.46
|
| 45 |
+
grpcio==1.80.0
|
| 46 |
+
gymnasium==0.29.1
|
| 47 |
+
h11==0.14.0
|
| 48 |
+
h5py==3.16.0
|
| 49 |
+
hf-xet==1.4.3
|
| 50 |
+
httpcore==1.0.5
|
| 51 |
+
httplib2==0.20.2
|
| 52 |
+
httpx==0.27.2
|
| 53 |
+
huggingface_hub==1.8.0
|
| 54 |
+
hydra-core==1.3.2
|
| 55 |
+
idna==3.10
|
| 56 |
+
ImageIO==2.37.3
|
| 57 |
+
imageio-ffmpeg==0.6.0
|
| 58 |
+
importlib-metadata==4.6.4
|
| 59 |
+
importlib_resources==6.5.2
|
| 60 |
+
ipykernel==6.29.5
|
| 61 |
+
ipython==8.27.0
|
| 62 |
+
ipython-genutils==0.2.0
|
| 63 |
+
ipywidgets==8.1.5
|
| 64 |
+
isoduration==20.11.0
|
| 65 |
+
jedi==0.19.1
|
| 66 |
+
jeepney==0.7.1
|
| 67 |
+
Jinja2==3.1.3
|
| 68 |
+
json5==0.9.25
|
| 69 |
+
jsonpointer==3.0.0
|
| 70 |
+
jsonschema==4.23.0
|
| 71 |
+
jsonschema-specifications==2023.12.1
|
| 72 |
+
jupyter-archive==3.4.0
|
| 73 |
+
jupyter-events==0.10.0
|
| 74 |
+
jupyter-highlight-selected-word==0.2.0
|
| 75 |
+
jupyter-lsp==2.2.5
|
| 76 |
+
jupyter_client==7.4.9
|
| 77 |
+
jupyter_contrib_core==0.4.2
|
| 78 |
+
jupyter_contrib_nbextensions==0.7.0
|
| 79 |
+
jupyter_core==5.7.2
|
| 80 |
+
jupyter_nbextensions_configurator==0.6.4
|
| 81 |
+
jupyter_server==2.14.2
|
| 82 |
+
jupyter_server_terminals==0.5.3
|
| 83 |
+
jupyterlab==4.2.5
|
| 84 |
+
jupyterlab_pygments==0.3.0
|
| 85 |
+
jupyterlab_server==2.27.3
|
| 86 |
+
jupyterlab_widgets==3.0.13
|
| 87 |
+
keyring==23.5.0
|
| 88 |
+
kiwisolver==1.5.0
|
| 89 |
+
launchpadlib==1.10.16
|
| 90 |
+
lazr.restfulclient==0.14.4
|
| 91 |
+
lazr.uri==1.0.6
|
| 92 |
+
lxml==5.3.0
|
| 93 |
+
mani_skill==3.0.0b22
|
| 94 |
+
Markdown==3.10.2
|
| 95 |
+
markdown-it-py==4.0.0
|
| 96 |
+
MarkupSafe==2.1.5
|
| 97 |
+
matplotlib==3.10.8
|
| 98 |
+
matplotlib-inline==0.1.7
|
| 99 |
+
mdurl==0.1.2
|
| 100 |
+
mistune==3.0.2
|
| 101 |
+
more-itertools==8.10.0
|
| 102 |
+
mplib==0.1.1
|
| 103 |
+
mpmath==1.3.0
|
| 104 |
+
nbclassic==1.1.0
|
| 105 |
+
nbclient==0.10.0
|
| 106 |
+
nbconvert==7.16.4
|
| 107 |
+
nbformat==5.10.4
|
| 108 |
+
nest-asyncio==1.6.0
|
| 109 |
+
networkx==3.2.1
|
| 110 |
+
notebook==6.5.5
|
| 111 |
+
notebook_shim==0.2.4
|
| 112 |
+
numpy==2.4.4
|
| 113 |
+
nvidia-cublas-cu12==12.4.2.65
|
| 114 |
+
nvidia-cuda-cupti-cu12==12.4.99
|
| 115 |
+
nvidia-cuda-nvrtc-cu12==12.4.99
|
| 116 |
+
nvidia-cuda-runtime-cu12==12.4.99
|
| 117 |
+
nvidia-cudnn-cu12==9.1.0.70
|
| 118 |
+
nvidia-cufft-cu12==11.2.0.44
|
| 119 |
+
nvidia-curand-cu12==10.3.5.119
|
| 120 |
+
nvidia-cusolver-cu12==11.6.0.99
|
| 121 |
+
nvidia-cusparse-cu12==12.3.0.142
|
| 122 |
+
nvidia-ml-py==13.595.45
|
| 123 |
+
nvidia-nccl-cu12==2.20.5
|
| 124 |
+
nvidia-nvjitlink-cu12==12.4.99
|
| 125 |
+
nvidia-nvtx-cu12==12.4.99
|
| 126 |
+
oauthlib==3.2.0
|
| 127 |
+
omegaconf==2.3.0
|
| 128 |
+
opencv-python==4.13.0.92
|
| 129 |
+
overrides==7.7.0
|
| 130 |
+
packaging==24.1
|
| 131 |
+
pandas==3.0.2
|
| 132 |
+
pandocfilters==1.5.1
|
| 133 |
+
parso==0.8.4
|
| 134 |
+
pexpect==4.9.0
|
| 135 |
+
pillow==10.2.0
|
| 136 |
+
platformdirs==4.3.6
|
| 137 |
+
prometheus_client==0.21.0
|
| 138 |
+
prompt_toolkit==3.0.47
|
| 139 |
+
protobuf==7.34.1
|
| 140 |
+
psutil==6.0.0
|
| 141 |
+
ptyprocess==0.7.0
|
| 142 |
+
pure_eval==0.2.3
|
| 143 |
+
py-spy==0.4.1
|
| 144 |
+
pycparser==2.22
|
| 145 |
+
Pygments==2.18.0
|
| 146 |
+
PyGObject==3.42.1
|
| 147 |
+
PyJWT==2.3.0
|
| 148 |
+
pyparsing==3.3.2
|
| 149 |
+
pyperclip==1.11.0
|
| 150 |
+
python-apt==2.4.0+ubuntu4
|
| 151 |
+
python-dateutil==2.9.0.post0
|
| 152 |
+
python-json-logger==2.0.7
|
| 153 |
+
pytorch-kinematics==0.7.6
|
| 154 |
+
pytorch-seed==0.2.0
|
| 155 |
+
PyYAML==6.0.2
|
| 156 |
+
pyzmq==24.0.1
|
| 157 |
+
referencing==0.35.1
|
| 158 |
+
regex==2026.3.32
|
| 159 |
+
requests==2.32.3
|
| 160 |
+
rfc3339-validator==0.1.4
|
| 161 |
+
rfc3986-validator==0.1.1
|
| 162 |
+
rich==14.3.3
|
| 163 |
+
rpds-py==0.20.0
|
| 164 |
+
safetensors==0.7.0
|
| 165 |
+
sapien==3.0.3
|
| 166 |
+
scipy==1.17.1
|
| 167 |
+
SecretStorage==3.3.1
|
| 168 |
+
Send2Trash==1.8.3
|
| 169 |
+
shellingham==1.5.4
|
| 170 |
+
six==1.16.0
|
| 171 |
+
smmap==5.0.3
|
| 172 |
+
sniffio==1.3.1
|
| 173 |
+
soupsieve==2.6
|
| 174 |
+
stack-data==0.6.3
|
| 175 |
+
sympy==1.12
|
| 176 |
+
systemd-python==234
|
| 177 |
+
tabulate==0.10.0
|
| 178 |
+
tensorboard==2.20.0
|
| 179 |
+
tensorboard-data-server==0.7.2
|
| 180 |
+
terminado==0.18.1
|
| 181 |
+
timm==1.0.26
|
| 182 |
+
tinycss2==1.3.0
|
| 183 |
+
tokenizers==0.22.2
|
| 184 |
+
toppra==0.6.3
|
| 185 |
+
torch==2.4.1+cu124
|
| 186 |
+
torchaudio==2.4.1+cu124
|
| 187 |
+
torchvision==0.19.1+cu124
|
| 188 |
+
tornado==6.4.1
|
| 189 |
+
tqdm==4.67.3
|
| 190 |
+
traitlets==5.14.3
|
| 191 |
+
transformers==5.4.0
|
| 192 |
+
transforms3d==0.4.2
|
| 193 |
+
trimesh==4.11.5
|
| 194 |
+
triton==3.0.0
|
| 195 |
+
typeguard==4.5.1
|
| 196 |
+
typer==0.24.1
|
| 197 |
+
types-python-dateutil==2.9.0.20240906
|
| 198 |
+
typing_extensions==4.15.0
|
| 199 |
+
tyro==1.0.11
|
| 200 |
+
uri-template==1.3.0
|
| 201 |
+
urllib3==2.2.3
|
| 202 |
+
wadllib==1.3.6
|
| 203 |
+
wcwidth==0.2.13
|
| 204 |
+
webcolors==24.8.0
|
| 205 |
+
webencodings==0.5.1
|
| 206 |
+
websocket-client==1.8.0
|
| 207 |
+
Werkzeug==3.1.7
|
| 208 |
+
widgetsnbextension==4.0.13
|
| 209 |
+
zipp==1.0.0
|
code/VLAarchtests4_root/setup/public_benchmark/python_version.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Python 3.11.10
|
code/VLAarchtests4_root/setup/public_benchmark/rlbench_env_pip_freeze.txt
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
absl-py==2.1.0
|
| 2 |
+
accelerate==0.31.0
|
| 3 |
+
addict==2.4.0
|
| 4 |
+
aiohappyeyeballs==2.6.1
|
| 5 |
+
aiohttp==3.13.5
|
| 6 |
+
aiosignal==1.4.0
|
| 7 |
+
antlr4-python3-runtime==4.9.3
|
| 8 |
+
appdirs==1.4.4
|
| 9 |
+
asttokens==3.0.1
|
| 10 |
+
async-timeout==5.0.1
|
| 11 |
+
attrs==26.1.0
|
| 12 |
+
backports.zstd @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_backports.zstd_1767044984/work
|
| 13 |
+
blinker==1.9.0
|
| 14 |
+
blosc==1.11.4
|
| 15 |
+
Brotli @ file:///home/conda/feedstock_root/build_artifacts/brotli-split_1764016952863/work
|
| 16 |
+
cached-property @ file:///home/conda/feedstock_root/build_artifacts/cached_property_1615209429212/work
|
| 17 |
+
certifi @ file:///home/conda/feedstock_root/build_artifacts/certifi_1772001073725/work/certifi
|
| 18 |
+
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1761202865726/work
|
| 19 |
+
charset-normalizer @ file:///home/conda/feedstock_root/build_artifacts/charset-normalizer_1773659966602/work
|
| 20 |
+
click==8.3.1
|
| 21 |
+
click-prompt==0.5.1
|
| 22 |
+
clip @ git+https://github.com/openai/CLIP.git@d05afc436d78f1c48dc0dbf8e5980a9d471f35f6
|
| 23 |
+
cloudpickle==3.1.2
|
| 24 |
+
comm==0.2.3
|
| 25 |
+
ConfigArgParse==1.7.5
|
| 26 |
+
contourpy @ file:///home/conda/feedstock_root/build_artifacts/contourpy_1744743067588/work
|
| 27 |
+
cycler @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_cycler_1764466758/work
|
| 28 |
+
dash==4.1.0
|
| 29 |
+
decorator==5.2.1
|
| 30 |
+
docker-pycreds==0.4.0
|
| 31 |
+
einops==0.8.0
|
| 32 |
+
exceptiongroup==1.3.1
|
| 33 |
+
executing==2.2.1
|
| 34 |
+
Farama-Notifications==0.0.4
|
| 35 |
+
fastjsonschema==2.21.2
|
| 36 |
+
filelock @ file:///home/conda/feedstock_root/build_artifacts/filelock_1773313889543/work
|
| 37 |
+
Flask==3.1.3
|
| 38 |
+
fonttools @ file:///home/conda/feedstock_root/build_artifacts/fonttools_1773137064424/work
|
| 39 |
+
freetype-py==2.5.1
|
| 40 |
+
frozenlist==1.8.0
|
| 41 |
+
fsspec==2026.3.0
|
| 42 |
+
ftfy==6.2.0
|
| 43 |
+
gitdb==4.0.12
|
| 44 |
+
GitPython==3.1.46
|
| 45 |
+
gmpy2 @ file:///home/conda/feedstock_root/build_artifacts/gmpy2_1773244929835/work
|
| 46 |
+
grpcio==1.80.0
|
| 47 |
+
gym==0.26.2
|
| 48 |
+
gym-notices==0.1.0
|
| 49 |
+
gymnasium==1.0.0a2
|
| 50 |
+
h2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_h2_1756364871/work
|
| 51 |
+
h5py @ file:///home/conda/feedstock_root/build_artifacts/h5py_1774712049671/work
|
| 52 |
+
hf-xet==1.4.2
|
| 53 |
+
hpack @ file:///home/conda/feedstock_root/build_artifacts/hpack_1737618293087/work
|
| 54 |
+
huggingface_hub==0.36.2
|
| 55 |
+
hydra-core==1.3.2
|
| 56 |
+
hyperframe @ file:///home/conda/feedstock_root/build_artifacts/hyperframe_1737618333194/work
|
| 57 |
+
idna @ file:///home/conda/feedstock_root/build_artifacts/idna_1760286409563/work
|
| 58 |
+
imageio @ file:///home/conda/feedstock_root/build_artifacts/imageio_1738273805233/work
|
| 59 |
+
imageio-ffmpeg==0.6.0
|
| 60 |
+
importlib_metadata==9.0.0
|
| 61 |
+
iniconfig==2.3.0
|
| 62 |
+
ipython==8.39.0
|
| 63 |
+
ipywidgets==8.1.8
|
| 64 |
+
itsdangerous==2.2.0
|
| 65 |
+
jedi==0.19.2
|
| 66 |
+
Jinja2 @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_jinja2_1764517220/work
|
| 67 |
+
joblib==1.5.3
|
| 68 |
+
jsonschema==4.26.0
|
| 69 |
+
jsonschema-specifications==2025.9.1
|
| 70 |
+
jupyter_core==5.9.1
|
| 71 |
+
jupyterlab_widgets==3.0.16
|
| 72 |
+
kiwisolver @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_kiwisolver_1773067043/work
|
| 73 |
+
Markdown==3.10.2
|
| 74 |
+
markdown-it-py==4.0.0
|
| 75 |
+
MarkupSafe @ file:///home/conda/feedstock_root/build_artifacts/markupsafe_1772444934960/work
|
| 76 |
+
matplotlib @ file:///home/conda/feedstock_root/build_artifacts/matplotlib-suite_1715976200404/work
|
| 77 |
+
matplotlib-inline==0.2.1
|
| 78 |
+
mdurl==0.1.2
|
| 79 |
+
moviepy==2.2.1
|
| 80 |
+
mpmath @ file:///home/conda/feedstock_root/build_artifacts/mpmath_1773661943568/work
|
| 81 |
+
multidict==6.7.1
|
| 82 |
+
munkres==1.1.4
|
| 83 |
+
narwhals==2.18.1
|
| 84 |
+
natsort==8.4.0
|
| 85 |
+
nbformat==5.10.4
|
| 86 |
+
nest-asyncio==1.6.0
|
| 87 |
+
networkx @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_networkx_1731521053/work
|
| 88 |
+
numpy==1.26.4
|
| 89 |
+
omegaconf==2.3.0
|
| 90 |
+
open3d==0.19.0
|
| 91 |
+
openai==0.28.1
|
| 92 |
+
opencv-python==4.10.0.84
|
| 93 |
+
packaging @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_packaging_1769093650/work
|
| 94 |
+
pandas @ file:///home/conda/feedstock_root/build_artifacts/pandas_1744430447393/work
|
| 95 |
+
parso==0.8.6
|
| 96 |
+
pathtools==0.1.2
|
| 97 |
+
perceiver-pytorch==0.8.8
|
| 98 |
+
pexpect==4.9.0
|
| 99 |
+
pillow==12.1.1
|
| 100 |
+
platformdirs==4.9.4
|
| 101 |
+
plotly==6.6.0
|
| 102 |
+
pluggy==1.6.0
|
| 103 |
+
ply @ file:///home/conda/feedstock_root/build_artifacts/ply_1733239724146/work
|
| 104 |
+
poetry-core==2.3.2
|
| 105 |
+
proglog==0.1.12
|
| 106 |
+
prompt_toolkit==3.0.52
|
| 107 |
+
propcache==0.4.1
|
| 108 |
+
protobuf==4.25.9
|
| 109 |
+
psutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_psutil_1769678154/work
|
| 110 |
+
ptyprocess==0.7.0
|
| 111 |
+
pure_eval==0.2.3
|
| 112 |
+
py-spy==0.4.1
|
| 113 |
+
pycparser @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pycparser_1733195786/work
|
| 114 |
+
pyglet==2.1.13
|
| 115 |
+
Pygments==2.20.0
|
| 116 |
+
PyOpenGL==3.1.0
|
| 117 |
+
pyparsing @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pyparsing_1769003998/work
|
| 118 |
+
PyQt5==5.15.11
|
| 119 |
+
PyQt5_sip==12.17.0
|
| 120 |
+
pyquaternion==0.9.9
|
| 121 |
+
pyrender==0.1.45
|
| 122 |
+
-e git+https://github.com/markusgrotz/PyRep.git@b8bd1d7a3182adcd570d001649c0849047ebf197#egg=PyRep
|
| 123 |
+
PySocks @ file:///home/conda/feedstock_root/build_artifacts/pysocks_1733217236728/work
|
| 124 |
+
pytest==9.0.2
|
| 125 |
+
python-dateutil @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_python-dateutil_1751104122/work
|
| 126 |
+
python-dotenv==1.2.2
|
| 127 |
+
pytorch-lamb==1.0.0
|
| 128 |
+
pytz @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_pytz_1773679724/work
|
| 129 |
+
PyYAML @ file:///home/conda/feedstock_root/build_artifacts/pyyaml_1770223234623/work
|
| 130 |
+
referencing==0.37.0
|
| 131 |
+
regex==2024.5.15
|
| 132 |
+
requests @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_requests_1774894783/work
|
| 133 |
+
retrying==1.4.2
|
| 134 |
+
# Editable install with no version control (reveal-vla-bimanual==0.1.0)
|
| 135 |
+
-e /workspace/reveal_vla_bimanual
|
| 136 |
+
rich==13.9.4
|
| 137 |
+
rich-click==1.8.9
|
| 138 |
+
-e git+https://github.com/markusgrotz/RLBench.git@8af748c51287989294e00c9c670e3330a0e35ed5#egg=rlbench
|
| 139 |
+
rpds-py==0.30.0
|
| 140 |
+
safetensors==0.4.3
|
| 141 |
+
scikit-learn==1.7.2
|
| 142 |
+
scipy @ file:///home/conda/feedstock_root/build_artifacts/scipy-split_1716470219380/work/dist/scipy-1.13.1-cp310-cp310-linux_x86_64.whl#sha256=a4ff22b6dc27b61196be51695f53f9b0676e7c1bc564872b51fc3c41b79ae80b
|
| 143 |
+
segment-anything==1.0
|
| 144 |
+
sentry-sdk==2.57.0
|
| 145 |
+
setproctitle==1.3.7
|
| 146 |
+
sip @ file:///home/conda/feedstock_root/build_artifacts/sip_1759437834046/work
|
| 147 |
+
six @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_six_1753199211/work
|
| 148 |
+
smmap==5.0.3
|
| 149 |
+
stack-data==0.6.3
|
| 150 |
+
sympy @ file:///home/conda/feedstock_root/build_artifacts/sympy_1771952240620/work
|
| 151 |
+
tensorboard==2.16.2
|
| 152 |
+
tensorboard-data-server==0.7.2
|
| 153 |
+
tensorboardX==2.6.4
|
| 154 |
+
termcolor==3.3.0
|
| 155 |
+
threadpoolctl==3.6.0
|
| 156 |
+
timeout-decorator==0.5.0
|
| 157 |
+
timm==1.0.26
|
| 158 |
+
tokenizers==0.19.1
|
| 159 |
+
toml @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_toml_1764486833/work
|
| 160 |
+
tomli @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_tomli_1774492402/work
|
| 161 |
+
torch==2.3.1
|
| 162 |
+
torchaudio==2.3.1
|
| 163 |
+
torchvision==0.18.1
|
| 164 |
+
tornado @ file:///home/conda/feedstock_root/build_artifacts/tornado_1774357896577/work
|
| 165 |
+
tqdm @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_tqdm_1770153424/work
|
| 166 |
+
traitlets==5.14.3
|
| 167 |
+
transformers==4.41.2
|
| 168 |
+
transforms3d==0.4.1
|
| 169 |
+
trimesh @ file:///home/conda/feedstock_root/build_artifacts/trimesh_1774412449209/work
|
| 170 |
+
triton==2.3.1
|
| 171 |
+
typing_extensions @ file:///home/conda/feedstock_root/build_artifacts/bld/rattler-build_typing_extensions_1756220668/work
|
| 172 |
+
tzdata @ file:///home/conda/feedstock_root/build_artifacts/python-tzdata_1765719872007/work
|
| 173 |
+
unicodedata2 @ file:///home/conda/feedstock_root/build_artifacts/unicodedata2_1770908960326/work
|
| 174 |
+
urllib3 @ file:///home/conda/feedstock_root/build_artifacts/urllib3_1767817748113/work
|
| 175 |
+
wandb==0.14.0
|
| 176 |
+
wcwidth==0.2.14
|
| 177 |
+
Werkzeug==3.1.7
|
| 178 |
+
widgetsnbextension==4.0.15
|
| 179 |
+
yarl==1.23.0
|
| 180 |
+
-e git+https://github.com/markusgrotz/YARR.git@6822ff78602c77878b27d4cfe759ce029c67bffb#egg=yarr
|
| 181 |
+
zipp==3.23.0
|
code/VLAarchtests4_root/setup/public_benchmark/uname.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Linux 7bf60ec67db4 6.8.0-88-generic #89-Ubuntu SMP PREEMPT_DYNAMIC Sat Oct 11 01:02:46 UTC 2025 x86_64 x86_64 x86_64 GNU/Linux
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode12/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ea33aa5ffad718904206f80ce4b80a46d0d50d8242b7b7022e93125d22550b5
|
| 3 |
+
size 568456
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode15/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:30f868aeb7ed7dfc523de54a5f779b147f148eb383526a370b251f3b0b2b3d61
|
| 3 |
+
size 578384
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode18/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be1e51d36069f06804aa10d2cd2d67e1471bcf229f100b32e9e93bd4ba53e171
|
| 3 |
+
size 642658
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode20/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d7b1e987961292635111fa2bc9f318575a851087120bcd65451d055999eeea8
|
| 3 |
+
size 625748
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode27/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c8ee3826de9f18c91cc44e7e25c017acd647d6c23584ef4253ef63224b9d4ce
|
| 3 |
+
size 586435
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode34/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59920a1d954892e419f82ace03bd9a75d2d5597ed75e8cb112d44e26fbb6b0f7
|
| 3 |
+
size 605954
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode4/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9683a54ffe5bf2660a91d5ea25ac96d3733cc10f1e5cd6f573869c66f9d00327
|
| 3 |
+
size 592262
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode43/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5eafc24e297c12ed8e53dbb780a6c848b92780ba4ae14eb93a91076979e21c8
|
| 3 |
+
size 555743
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode49/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:297e0520f3a8862ff3e49c3eb3d7d05356a4e23f8ab2a95090737d21cbe5ec81
|
| 3 |
+
size 582075
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode50/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a4e903c529a6c62b374c4e00d25ab17d5f7ee2d33253e5ff8b4fed1350d2c041
|
| 3 |
+
size 596780
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode57/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f903fb5aac5bde8093b5e90f2d321c3128c30c19c21aaf4ad9cad76340d27717
|
| 3 |
+
size 601851
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode62/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:85a002f02f2ac7d4792db29a5e4e0dc23e1d29763b37b91776f1d69698e69539
|
| 3 |
+
size 563917
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode65/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:55a40ffa02b4b2700ffb9cf91ffaea97c0f2949dc7f50e00f38c257d0002737a
|
| 3 |
+
size 564743
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode68/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c7e5a1a4859e49fd0c20de588b290c9b14f26565b510dbbe7d2a25054576d2a
|
| 3 |
+
size 569596
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode71/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cf9ff624965a496b71aa6ab4ef885d9835b25ce03c47b80d4e9813845d62a1a
|
| 3 |
+
size 578764
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode76/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a89f9eeb584881cb5ac320db945ac62ff9176265b127e8ef4a10fc1d81f18f94
|
| 3 |
+
size 627864
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode81/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aad865c66bf34b71e47ebe37b4f804ec8b76dde37c5083107b23a13b52d8bbe1
|
| 3 |
+
size 578663
|
data/3dfa/peract2_test/bimanual_lift_tray/variation0/episodes/episode86/low_dim_obs.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff4e620c0c6547ebbae4420b6924b4e0e4e1f54e461443ea18d235637d20aa68
|
| 3 |
+
size 623542
|