| { |
| "type": "lbm_policy", |
| "chunk_size": 16, |
| "n_action_steps": 8, |
| "n_obs_steps": 2, |
| "freq": 10.0, |
| "use_relative_action": true, |
| "use_relative_pose": false, |
| "normalization_mapping": { |
| "visual": "min_max", |
| "state": "min_max", |
| "action_pose_gripper": "min_max" |
| }, |
| "input_features": { |
| "observation_images_left_wrist": { |
| "shape": [ |
| 3, |
| 224, |
| 224 |
| ], |
| "dtype": "visual", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_images_right_wrist": { |
| "shape": [ |
| 3, |
| 224, |
| 224 |
| ], |
| "dtype": "visual", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_images_head": { |
| "shape": [ |
| 3, |
| 224, |
| 224 |
| ], |
| "dtype": "visual", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_images_chest": { |
| "shape": [ |
| 3, |
| 224, |
| 224 |
| ], |
| "dtype": "visual", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states": { |
| "shape": [ |
| 34 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": { |
| "observation_states_ee_pose_left": { |
| "shape": [ |
| 9 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": "rotation_6d", |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states_joint_angle_left": { |
| "shape": [ |
| 7 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states_gripper_left": { |
| "shape": [ |
| 1 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states_ee_pose_right": { |
| "shape": [ |
| 9 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": "rotation_6d", |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states_joint_angle_right": { |
| "shape": [ |
| 7 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "observation_states_gripper_right": { |
| "shape": [ |
| 1 |
| ], |
| "dtype": "state", |
| "fps": null, |
| "rep": null, |
| "rep_kwargs": null, |
| "compose": null |
| } |
| } |
| } |
| }, |
| "output_features": { |
| "action": { |
| "shape": [ |
| 20 |
| ], |
| "dtype": "action_pose_gripper", |
| "fps": null, |
| "rep": "rotation_6d", |
| "rep_kwargs": null, |
| "compose": { |
| "action_left": { |
| "shape": [ |
| 10 |
| ], |
| "dtype": "action_pose_gripper", |
| "fps": null, |
| "rep": "rotation_6d", |
| "rep_kwargs": null, |
| "compose": null |
| }, |
| "action_right": { |
| "shape": [ |
| 10 |
| ], |
| "dtype": "action_pose_gripper", |
| "fps": null, |
| "rep": "rotation_6d", |
| "rep_kwargs": null, |
| "compose": null |
| } |
| } |
| } |
| }, |
| "device": "cuda", |
| "use_amp": false, |
| "mixed_precision": null, |
| "data_transform_override": null, |
| "max_action_dim": 20, |
| "share_obs_encoder": true, |
| "obs_encoder_group": null, |
| "noise_scheduler": { |
| "type": "fm_beta", |
| "num_train_timesteps": 10, |
| "alpha": 1.5, |
| "beta": 1.0, |
| "s": 0.999, |
| "clip_sample": false, |
| "clip_sample_range": 1.0, |
| "flow_sig_min": 0.0, |
| "num_inference_steps": 10, |
| "prediction_type": "velocity", |
| "path_reversed": false |
| }, |
| "vision_encoder": { |
| "type": "clip_hf_vision_encoder", |
| "model_name": "openai/clip-vit-base-patch16", |
| "tune_vision_encoder": true, |
| "extract_cls_token": true, |
| "output_projection_dim": null |
| }, |
| "text_encoder": { |
| "type": "clip_hf_text_encoder", |
| "model_name": "openai/clip-vit-base-patch32", |
| "tune_text_encoder": false, |
| "tune_projection_layer": true, |
| "output_projection_dim": 768, |
| "extract_eos_token": true |
| }, |
| "action_head": { |
| "type": "lbm_action_head", |
| "noise_scheduler": { |
| "type": "flow_matching_beta", |
| "num_inference_steps": 10, |
| "alpha": 1.5, |
| "beta": 1.0, |
| "s": 0.999, |
| "input_perturb": 0.0, |
| "sampling_path": "euler", |
| "clip_sample": false, |
| "clip_sample_range": null, |
| "path_reversed": false |
| }, |
| "num_layers": 16, |
| "hidden_size": 768, |
| "num_attention_heads": 16, |
| "dropout": 0.1, |
| "final_dropout": true, |
| "max_timestep_buckets": 1000, |
| "conditioning_dim": 6980, |
| "max_state_dim": 34, |
| "max_action_dim": 20, |
| "max_chunk_size": 16 |
| }, |
| "proprioception_dim": 34, |
| "optimizer_lr": 0.0001, |
| "optimizer_betas": [ |
| 0.9, |
| 0.95 |
| ], |
| "optimizer_eps": 1e-08, |
| "optimizer_weight_decay": 1e-06, |
| "optimizer_vision_lr": 0.0001, |
| "optimizer_vision_weight_decay": 1e-06, |
| "scheduler_name": "cosine", |
| "scheduler_warmup_steps": 500 |
| } |