| { |
| "type": "multi_task_dit", |
| "n_obs_steps": 2, |
| "input_features": { |
| "observation.images.image": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 256, |
| 256 |
| ] |
| }, |
| "observation.images.image2": { |
| "type": "VISUAL", |
| "shape": [ |
| 3, |
| 256, |
| 256 |
| ] |
| }, |
| "observation.state": { |
| "type": "STATE", |
| "shape": [ |
| 8 |
| ] |
| } |
| }, |
| "output_features": { |
| "action": { |
| "type": "ACTION", |
| "shape": [ |
| 7 |
| ] |
| } |
| }, |
| "device": "cuda", |
| "use_amp": true, |
| "use_peft": false, |
| "push_to_hub": false, |
| "repo_id": null, |
| "private": null, |
| "tags": null, |
| "license": null, |
| "pretrained_path": null, |
| "horizon": 32, |
| "n_action_steps": 10, |
| "objective": "diffusion", |
| "noise_scheduler_type": "DDPM", |
| "num_train_timesteps": 100, |
| "beta_schedule": "squaredcos_cap_v2", |
| "beta_start": 0.0001, |
| "beta_end": 0.02, |
| "prediction_type": "epsilon", |
| "clip_sample": true, |
| "clip_sample_range": 1.0, |
| "num_inference_steps": null, |
| "sigma_min": 0.0, |
| "num_integration_steps": 100, |
| "integration_method": "euler", |
| "timestep_sampling_strategy": "beta", |
| "timestep_sampling_s": 0.999, |
| "timestep_sampling_alpha": 1.5, |
| "timestep_sampling_beta": 1.0, |
| "hidden_dim": 512, |
| "num_layers": 8, |
| "num_heads": 8, |
| "dropout": 0.1, |
| "modality_dropout_prob": 0.1, |
| "modality_weight_entropy_reg_weight": 0.0, |
| "modality_weight_balance_reg_weight": 0.0, |
| "use_positional_encoding": false, |
| "timestep_embed_dim": 128, |
| "use_rope": true, |
| "rope_base": 10000.0, |
| "qformer_num_queries": 8, |
| "qformer_hidden_dim": 512, |
| "qformer_num_layers": 2, |
| "qformer_num_heads": 8, |
| "qformer_dropout": 0.1, |
| "vision_encoder_name": "openai/clip-vit-base-patch16", |
| "use_separate_rgb_encoder_per_camera": false, |
| "vision_encoder_lr_multiplier": 0.1, |
| "image_resize_shape": [ |
| 256, |
| 256 |
| ], |
| "image_crop_shape": [ |
| 224, |
| 224 |
| ], |
| "image_crop_is_random": true, |
| "robot_state_embed_dim": 128, |
| "text_embed_dim": 128, |
| "text_encoder_name": "openai/clip-vit-base-patch16", |
| "tokenizer_max_length": 77, |
| "tokenizer_padding": "max_length", |
| "tokenizer_padding_side": "right", |
| "tokenizer_truncation": true, |
| "normalization_mapping": { |
| "VISUAL": "MEAN_STD", |
| "STATE": "MIN_MAX", |
| "ACTION": "MIN_MAX" |
| }, |
| "optimizer_lr": 0.0003, |
| "optimizer_betas": [ |
| 0.95, |
| 0.999 |
| ], |
| "optimizer_eps": 1e-08, |
| "optimizer_weight_decay": 1e-06, |
| "scheduler_name": "cosine", |
| "scheduler_warmup_steps": 500, |
| "do_mask_loss_for_padding": false, |
| "robust_adapter_training": false, |
| "robust_adapter_hidden_dim": 512, |
| "robust_adapter_num_queries": 4, |
| "robust_adapter_dropout": 0.0, |
| "robust_feature_loss_weight": 1.0, |
| "robust_action_loss_weight": 1.0, |
| "robust_clean_action_loss_weight": 0.0, |
| "robust_noise_std": 0.08, |
| "robust_patch_noise_prob": 0.5, |
| "robust_patch_noise_ratio": [ |
| 0.12, |
| 0.35 |
| ], |
| "robust_high_freq_prob": 0.5, |
| "robust_photometric_prob": 0.5, |
| "robust_brightness": 0.4, |
| "robust_contrast": 0.5, |
| "robust_saturation": 0.6, |
| "robust_hue": 0.15, |
| "robust_affine_prob": 0.5, |
| "robust_affine_max_degrees": 15.0, |
| "robust_affine_max_translate": 0.1, |
| "robust_affine_scale": [ |
| 0.9, |
| 1.1 |
| ], |
| "robust_vertical_flip_prob": 0.25, |
| "drop_n_last_frames": 21 |
| } |