| { |
| "dataset": { |
| "repo_id": "HuggingFaceVLA/libero", |
| "root": null, |
| "episodes": null, |
| "image_transforms": { |
| "enable": false, |
| "max_num_transforms": 3, |
| "random_order": false, |
| "tfs": { |
| "brightness": { |
| "weight": 1.0, |
| "type": "ColorJitter", |
| "kwargs": { |
| "brightness": [ |
| 0.8, |
| 1.2 |
| ] |
| } |
| }, |
| "contrast": { |
| "weight": 1.0, |
| "type": "ColorJitter", |
| "kwargs": { |
| "contrast": [ |
| 0.8, |
| 1.2 |
| ] |
| } |
| }, |
| "saturation": { |
| "weight": 1.0, |
| "type": "ColorJitter", |
| "kwargs": { |
| "saturation": [ |
| 0.5, |
| 1.5 |
| ] |
| } |
| }, |
| "hue": { |
| "weight": 1.0, |
| "type": "ColorJitter", |
| "kwargs": { |
| "hue": [ |
| -0.05, |
| 0.05 |
| ] |
| } |
| }, |
| "sharpness": { |
| "weight": 1.0, |
| "type": "SharpnessJitter", |
| "kwargs": { |
| "sharpness": [ |
| 0.5, |
| 1.5 |
| ] |
| } |
| }, |
| "affine": { |
| "weight": 1.0, |
| "type": "RandomAffine", |
| "kwargs": { |
| "degrees": [ |
| -5.0, |
| 5.0 |
| ], |
| "translate": [ |
| 0.05, |
| 0.05 |
| ] |
| } |
| } |
| } |
| }, |
| "revision": null, |
| "use_imagenet_stats": true, |
| "video_backend": "torchcodec", |
| "streaming": false |
| }, |
| "env": { |
| "type": "libero", |
| "task": "libero_spatial,libero_object,libero_goal,libero_10", |
| "fps": 30, |
| "features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 7 |
| ] |
| }, |
| "pixels/agentview_image": { |
| "type": "VISUAL", |
| "shape": [ |
| 360, |
| 360, |
| 3 |
| ] |
| }, |
| "pixels/robot0_eye_in_hand_image": { |
| "type": "VISUAL", |
| "shape": [ |
| 360, |
| 360, |
| 3 |
| ] |
| }, |
| "robot_state/eef/pos": { |
| "type": "STATE", |
| "shape": [ |
| 3 |
| ] |
| }, |
| "robot_state/eef/quat": { |
| "type": "STATE", |
| "shape": [ |
| 4 |
| ] |
| }, |
| "robot_state/eef/mat": { |
| "type": "STATE", |
| "shape": [ |
| 3, |
| 3 |
| ] |
| }, |
| "robot_state/gripper/qpos": { |
| "type": "STATE", |
| "shape": [ |
| 2 |
| ] |
| }, |
| "robot_state/gripper/qvel": { |
| "type": "STATE", |
| "shape": [ |
| 2 |
| ] |
| }, |
| "robot_state/joints/pos": { |
| "type": "STATE", |
| "shape": [ |
| 7 |
| ] |
| }, |
| "robot_state/joints/vel": { |
| "type": "STATE", |
| "shape": [ |
| 7 |
| ] |
| } |
| }, |
| "features_map": { |
| "action": "action", |
| "robot_state/eef/pos": "observation.state.eef_pos", |
| "robot_state/eef/quat": "observation.state.eef_quat", |
| "robot_state/eef/mat": "observation.state.eef_mat", |
| "robot_state/gripper/qpos": "observation.state.gripper_qpos", |
| "robot_state/gripper/qvel": "observation.state.gripper_qvel", |
| "robot_state/joints/pos": "observation.state.joint_pos", |
| "robot_state/joints/vel": "observation.state.joint_vel", |
| "pixels/agentview_image": "observation.images.image", |
| "pixels/robot0_eye_in_hand_image": "observation.images.image2" |
| }, |
| "max_parallel_tasks": 1, |
| "disable_env_checker": true, |
| "task_ids": null, |
| "episode_length": null, |
| "obs_type": "pixels_agent_pos", |
| "render_mode": "rgb_array", |
| "camera_name": "agentview_image,robot0_eye_in_hand_image", |
| "init_states": true, |
| "camera_name_mapping": null, |
| "observation_height": 360, |
| "observation_width": 360, |
| "control_mode": "relative" |
| }, |
| "policy": { |
| "type": "multi_task_dit", |
| "n_obs_steps": 2, |
| "input_features": { |
| "observation.images.image": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 256, |
| 256 |
| ] |
| }, |
| "observation.images.image2": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 256, |
| 256 |
| ] |
| }, |
| "observation.state": { |
| "type": "STATE", |
| "shape": [ |
| 8 |
| ] |
| } |
| }, |
| "output_features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 7 |
| ] |
| } |
| }, |
| "device": "cuda", |
| "use_amp": true, |
| "use_peft": false, |
| "push_to_hub": false, |
| "repo_id": null, |
| "private": null, |
| "tags": null, |
| "license": null, |
| "pretrained_path": null, |
| "horizon": 32, |
| "n_action_steps": 10, |
| "objective": "diffusion", |
| "noise_scheduler_type": "DDPM", |
| "num_train_timesteps": 100, |
| "beta_schedule": "squaredcos_cap_v2", |
| "beta_start": 0.0001, |
| "beta_end": 0.02, |
| "prediction_type": "epsilon", |
| "clip_sample": true, |
| "clip_sample_range": 1.0, |
| "num_inference_steps": null, |
| "sigma_min": 0.0, |
| "num_integration_steps": 100, |
| "integration_method": "euler", |
| "timestep_sampling_strategy": "beta", |
| "timestep_sampling_s": 0.999, |
| "timestep_sampling_alpha": 1.5, |
| "timestep_sampling_beta": 1.0, |
| "hidden_dim": 512, |
| "num_layers": 8, |
| "num_heads": 8, |
| "dropout": 0.1, |
| "modality_dropout_prob": 0.1, |
| "modality_weight_entropy_reg_weight": 0.0, |
| "modality_weight_balance_reg_weight": 0.0, |
| "use_positional_encoding": false, |
| "timestep_embed_dim": 128, |
| "use_rope": true, |
| "rope_base": 10000.0, |
| "qformer_num_queries": 8, |
| "qformer_hidden_dim": 512, |
| "qformer_num_layers": 2, |
| "qformer_num_heads": 8, |
| "qformer_dropout": 0.1, |
| "vision_encoder_name": "openai/clip-vit-base-patch16", |
| "use_separate_rgb_encoder_per_camera": false, |
| "vision_encoder_lr_multiplier": 0.1, |
| "image_resize_shape": [ |
| 256, |
| 256 |
| ], |
| "image_crop_shape": [ |
| 224, |
| 224 |
| ], |
| "image_crop_is_random": true, |
| "robot_state_embed_dim": 128, |
| "text_embed_dim": 128, |
| "text_encoder_name": "openai/clip-vit-base-patch16", |
| "tokenizer_max_length": 77, |
| "tokenizer_padding": "max_length", |
| "tokenizer_padding_side": "right", |
| "tokenizer_truncation": true, |
| "normalization_mapping": { |
| "VISUAL": "MEAN_STD", |
| "STATE": "MIN_MAX", |
| "ACTION": "MIN_MAX" |
| }, |
| "optimizer_lr": 0.0003, |
| "optimizer_betas": [ |
| 0.95, |
| 0.999 |
| ], |
| "optimizer_eps": 1e-08, |
| "optimizer_weight_decay": 1e-06, |
| "scheduler_name": "cosine", |
| "scheduler_warmup_steps": 500, |
| "do_mask_loss_for_padding": false, |
| "robust_adapter_training": false, |
| "robust_adapter_hidden_dim": 512, |
| "robust_adapter_num_queries": 4, |
| "robust_adapter_dropout": 0.0, |
| "robust_feature_loss_weight": 1.0, |
| "robust_action_loss_weight": 1.0, |
| "robust_clean_action_loss_weight": 0.0, |
| "robust_noise_std": 0.08, |
| "robust_patch_noise_prob": 0.5, |
| "robust_patch_noise_ratio": [ |
| 0.12, |
| 0.35 |
| ], |
| "robust_high_freq_prob": 0.5, |
| "robust_photometric_prob": 0.5, |
| "robust_brightness": 0.4, |
| "robust_contrast": 0.5, |
| "robust_saturation": 0.6, |
| "robust_hue": 0.15, |
| "robust_affine_prob": 0.5, |
| "robust_affine_max_degrees": 15.0, |
| "robust_affine_max_translate": 0.1, |
| "robust_affine_scale": [ |
| 0.9, |
| 1.1 |
| ], |
| "robust_vertical_flip_prob": 0.25, |
| "drop_n_last_frames": 21 |
| }, |
| "output_dir": "outputs/multi_task_dit/libero-all/multi_task_dit-bs64-asteps10-300k", |
| "job_name": "multi_task_dit-bs64-asteps10-300k", |
| "resume": false, |
| "seed": 1000, |
| "cudnn_deterministic": false, |
| "num_workers": 12, |
| "batch_size": 64, |
| "steps": 300000, |
| "eval_freq": 300000, |
| "log_freq": 100, |
| "tolerance_s": 0.0001, |
| "save_checkpoint": true, |
| "save_freq": 50000, |
| "use_policy_training_preset": true, |
| "optimizer": { |
| "type": "adam", |
| "lr": 0.0003, |
| "weight_decay": 1e-06, |
| "grad_clip_norm": 10.0, |
| "betas": [ |
| 0.95, |
| 0.999 |
| ], |
| "eps": 1e-08 |
| }, |
| "scheduler": { |
| "type": "diffuser", |
| "num_warmup_steps": 500, |
| "name": "cosine" |
| }, |
| "eval": { |
| "n_episodes": 10, |
| "batch_size": 1, |
| "use_async_envs": false |
| }, |
| "wandb": { |
| "enable": true, |
| "disable_artifact": false, |
| "project": "libero-all", |
| "entity": "li-gen", |
| "notes": null, |
| "run_id": "multi_task_dit-bs64-asteps10-300k", |
| "mode": "offline", |
| "add_tags": true |
| }, |
| "peft": null, |
| "use_rabc": false, |
| "rabc_progress_path": null, |
| "rabc_kappa": 0.01, |
| "rabc_epsilon": 1e-06, |
| "rabc_head_mode": "sparse", |
| "rename_map": {}, |
| "checkpoint_path": null |
| } |