{ "dataset": { "repo_id": "robot-learning-group47/eval2_compositional_augmented", "root": "/home/shadeform/.cache/lerobot/robot-learning-group47/eval2_compositional_augmented", "episodes": null, "image_transforms": { "enable": true, "max_num_transforms": 3, "random_order": false, "tfs": { "brightness": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "brightness": [ 0.7, 1.3 ] } }, "contrast": { "weight": 1.0, "type": "ColorJitter", "kwargs": { "contrast": [ 0.7, 1.3 ] } }, "sharpness": { "weight": 0.5, "type": "SharpnessJitter", "kwargs": { "sharpness": [ 0.5, 1.5 ] } } } }, "revision": "main", "use_imagenet_stats": true, "video_backend": "pyav", "streaming": false }, "env": null, "policy": { "type": "smolvla", "n_obs_steps": 1, "input_features": { "observation.state": { "type": "STATE", "shape": [ 6 ] }, "observation.images.camera1": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.camera2": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.camera3": { "type": "VISUAL", "shape": [ 3, 256, 256 ] }, "observation.images.empty_camera_0": { "type": "VISUAL", "shape": [ 3, 480, 640 ] }, "observation.images.empty_camera_1": { "type": "VISUAL", "shape": [ 3, 480, 640 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 6 ] } }, "device": "cuda", "use_amp": true, "use_peft": true, "push_to_hub": true, "repo_id": "robot-learning-group47/smolvla-eval2-lora-vqa-eval1expert", "private": null, "tags": null, "license": null, "pretrained_path": "/home/shadeform/outputs/smolvla-eval2-pathC-init", "chunk_size": 50, "n_action_steps": 50, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MEAN_STD", "ACTION": "MEAN_STD" }, "max_state_dim": 32, "max_action_dim": 32, "resize_imgs_with_padding": [ 256, 256 ], "empty_cameras": 2, "adapt_to_pi_aloha": false, "use_delta_joint_actions_aloha": false, "tokenizer_max_length": 48, "num_steps": 10, "use_cache": true, "freeze_vision_encoder": true, "train_expert_only": false, "train_state_proj": true, "optimizer_lr": 0.0001, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-08, "optimizer_weight_decay": 1e-10, "optimizer_grad_clip_norm": 10.0, "scheduler_warmup_steps": 1000, "scheduler_decay_steps": 30000, "scheduler_decay_lr": 2.5e-06, "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", "load_vlm_weights": true, "add_image_special_tokens": false, "attention_mode": "cross_attn", "prefix_length": 0, "pad_language_to": "max_length", "num_expert_layers": 0, "num_vlm_layers": 16, "self_attn_every_n_layers": 2, "expert_width_multiplier": 0.75, "min_period": 0.004, "max_period": 4.0, "rtc_config": null, "compile_model": false, "compile_mode": "max-autotune" }, "output_dir": "/home/shadeform/outputs/smolvla-eval2-lora-vqa-eval1expert", "job_name": "smolvla-eval2-lora-vqa-eval1expert", "resume": false, "seed": 1000, "num_workers": 8, "batch_size": 32, "steps": 40000, "eval_freq": 40000, "log_freq": 50, "tolerance_s": 0.0001, "save_checkpoint": true, "save_freq": 2000, "use_policy_training_preset": false, "optimizer": { "type": "adamw", "lr": 5e-06, "weight_decay": 1e-10, "grad_clip_norm": 10.0, "betas": [ 0.9, 0.95 ], "eps": 1e-08 }, "scheduler": { "type": "cosine_decay_with_warmup", "num_warmup_steps": 1000, "num_decay_steps": 40000, "peak_lr": 5e-06, "decay_lr": 5e-08 }, "eval": { "n_episodes": 50, "batch_size": 50, "use_async_envs": false }, "wandb": { "enable": true, "disable_artifact": false, "project": "lerobot", "entity": null, "notes": null, "run_id": "6rwjph6b", "mode": null }, "peft": { "target_modules": "model\\.vlm_with_expert\\.vlm\\.model\\.text_model\\.layers\\.[0-9]+\\.self_attn\\.(q|v)_proj", "full_training_modules": [ "lm_expert", "state_proj", "action_in_proj", "action_out_proj", "action_time_mlp_in", "action_time_mlp_out" ], "method_type": "lora", "init_type": null, "r": 16 }, "use_rabc": false, "rabc_progress_path": null, "rabc_kappa": 0.01, "rabc_epsilon": 1e-06, "rabc_head_mode": "sparse", "rename_map": { "observation.images.front": "observation.images.camera1" }, "checkpoint_path": null }