diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..67cc6086cf16f2b1383f6d797bb7b67cc13b7445 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +wandb/run-20260518_165501-vjy2hgq7/run-vjy2hgq7.wandb filter=lfs diff=lfs merge=lfs -text diff --git a/checkpoints/001000/pretrained_model/config.json b/checkpoints/001000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/001000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/model.safetensors b/checkpoints/001000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9c96ba51bb31cd564985ddacfbc2e8aea897282e --- /dev/null +++ b/checkpoints/001000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e981e10b53f98da2aa0e6fe4597682856d989abb39ed2834910fce38f517b909 +size 1197789224 diff --git a/checkpoints/001000/pretrained_model/policy_postprocessor.json b/checkpoints/001000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/001000/pretrained_model/policy_preprocessor.json b/checkpoints/001000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/001000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/001000/pretrained_model/train_config.json b/checkpoints/001000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/001000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/001000/training_state/optimizer_param_groups.json b/checkpoints/001000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..55a3bdb608cc32f739cd274a57cb1bc4515eeaef --- /dev/null +++ b/checkpoints/001000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.973294239920334e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/001000/training_state/optimizer_state.safetensors b/checkpoints/001000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6ed88261624ecf2d6f53ac6a4d97923d31d089c7 --- /dev/null +++ b/checkpoints/001000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5b61883189e01426eceab3addfbb662cf4d861c232a11068016457e472c1f05 +size 412659164 diff --git a/checkpoints/001000/training_state/rng_state.safetensors b/checkpoints/001000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eac85a39b2f05cee1dc580b57d66cad0d83652f2 --- /dev/null +++ b/checkpoints/001000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:460a2a3799e009fa4904dd8bcce8b21c57e38ace4ea203fbdad0d7fa2dda93a2 +size 15708 diff --git a/checkpoints/001000/training_state/scheduler_state.json b/checkpoints/001000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..2de0149e43cc16e9c30892b8eea4be66f436da18 --- /dev/null +++ b/checkpoints/001000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 1000, + "_step_count": 1001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.973294239920334e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/001000/training_state/training_step.json b/checkpoints/001000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d98c94f5b78238bf495ac68b9f9fb446cfac5c07 --- /dev/null +++ b/checkpoints/001000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 1000 +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/config.json b/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/model.safetensors b/checkpoints/002000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75f732c1b6a38914368bfa4fdd1c4140ebb1c65c --- /dev/null +++ b/checkpoints/002000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:154dcb2cb4d55b3bcda9e7c1b692841f9beb2f5e984a17f8a416332dbc20290f +size 1197789224 diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor.json b/checkpoints/002000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor.json b/checkpoints/002000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/002000/pretrained_model/train_config.json b/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_param_groups.json b/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..d463af8441e65d055227b965941c5df4dbdcd282 --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.893469553577303e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_state.safetensors b/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2894ec1e5a6e00a6d26e935298bcc6dfe063b42d --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d109b71739a30059f576edba9d01e318c8730764483f3e2e92fb9fec5433228 +size 412659164 diff --git a/checkpoints/002000/training_state/rng_state.safetensors b/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a6b9921c5d7bd801bbf5307d77890494ed5db89a --- /dev/null +++ b/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32e047154d9fd7f0713365dc5499ab98f0d76677f3662511fe25665fad9d4323 +size 15708 diff --git a/checkpoints/002000/training_state/scheduler_state.json b/checkpoints/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c7653e306023b5aa9a3823ac7b9f430e5116428 --- /dev/null +++ b/checkpoints/002000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 2000, + "_step_count": 2001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.893469553577303e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/training_step.json b/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/config.json b/checkpoints/003000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/003000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/model.safetensors b/checkpoints/003000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43957e1c31df5d33001ef9fbae292adb87a0259f --- /dev/null +++ b/checkpoints/003000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e560ca62b5263e1c03a2b5ca57edda76053c6736c5e8ec16edce792590ecd0f +size 1197789224 diff --git a/checkpoints/003000/pretrained_model/policy_postprocessor.json b/checkpoints/003000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/003000/pretrained_model/policy_preprocessor.json b/checkpoints/003000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/003000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/003000/pretrained_model/train_config.json b/checkpoints/003000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/003000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/003000/training_state/optimizer_param_groups.json b/checkpoints/003000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..45fc40f0a4f1a6e97775858d8603792f63b42abc --- /dev/null +++ b/checkpoints/003000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.761400516938874e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/003000/training_state/optimizer_state.safetensors b/checkpoints/003000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4ca2fc69fdd7fed97089e2480324e8ee79f947f --- /dev/null +++ b/checkpoints/003000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3eb3de70a0f6fe677bef6b02390bbcf1bf7863e59277cce8dbe89233c2c1d1 +size 412659164 diff --git a/checkpoints/003000/training_state/rng_state.safetensors b/checkpoints/003000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..acd037ae682b2b0c6d24e146b90ac28c6ff2e263 --- /dev/null +++ b/checkpoints/003000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d738f0e044604dbdba8a01fbea9112c3077a2b32cc4f01d0189310955588c00 +size 15708 diff --git a/checkpoints/003000/training_state/scheduler_state.json b/checkpoints/003000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3e3dd6d95c8a314cffc649fca389e912a1d89908 --- /dev/null +++ b/checkpoints/003000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 3000, + "_step_count": 3001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.761400516938874e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/003000/training_state/training_step.json b/checkpoints/003000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a44127b7dfea653fd776d529fa83c55d32081c --- /dev/null +++ b/checkpoints/003000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 3000 +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/config.json b/checkpoints/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/004000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/model.safetensors b/checkpoints/004000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d193f69377da1d59eb8bad4ff3d4448bff5087d --- /dev/null +++ b/checkpoints/004000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7049dd1a2babcb4167ab125f94eaaf88fe8c471a6e3a30abac6051592f9263 +size 1197789224 diff --git a/checkpoints/004000/pretrained_model/policy_postprocessor.json b/checkpoints/004000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/004000/pretrained_model/policy_preprocessor.json b/checkpoints/004000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/004000/pretrained_model/train_config.json b/checkpoints/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/004000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/004000/training_state/optimizer_param_groups.json b/checkpoints/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..f2a89a9d735b293821e76e0cf5e56f2803ab85ef --- /dev/null +++ b/checkpoints/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.578534106007679e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/004000/training_state/optimizer_state.safetensors b/checkpoints/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5b95c0329238bc22ffde1b56f17fd803782aaac5 --- /dev/null +++ b/checkpoints/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d6ca22d75bba31a88f9002572a6dbb1c0025c5736f34b87384bf2c7725904f +size 412659164 diff --git a/checkpoints/004000/training_state/rng_state.safetensors b/checkpoints/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2b985481ea68dc76a4dc4e2087dc55b0eb2ea697 --- /dev/null +++ b/checkpoints/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bf7141d6567e04eab16759b583f7fe2aabe076cc9babdc41c150cccec8e9f4f +size 15708 diff --git a/checkpoints/004000/training_state/scheduler_state.json b/checkpoints/004000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b4732a3f05f5197879a13fa628a896a2aba5f50 --- /dev/null +++ b/checkpoints/004000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 4000, + "_step_count": 4001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.578534106007679e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/004000/training_state/training_step.json b/checkpoints/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/checkpoints/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/checkpoints/005000/pretrained_model/config.json b/checkpoints/005000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/005000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/005000/pretrained_model/model.safetensors b/checkpoints/005000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b630c7febf0416d0d8421786a0c5d14339572e7d --- /dev/null +++ b/checkpoints/005000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b33c7e481e92683dc295a1d630f3c6935f2f31444f0cf091e9e3c4ab16a43a +size 1197789224 diff --git a/checkpoints/005000/pretrained_model/policy_postprocessor.json b/checkpoints/005000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/005000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/005000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/005000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/005000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/005000/pretrained_model/policy_preprocessor.json b/checkpoints/005000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/005000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/005000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/005000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/005000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/005000/pretrained_model/train_config.json b/checkpoints/005000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/005000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/005000/training_state/optimizer_param_groups.json b/checkpoints/005000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..da3ff6fe3c49bde113b5e51b8f07a79834a00a55 --- /dev/null +++ b/checkpoints/005000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.34687384344914e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/005000/training_state/optimizer_state.safetensors b/checkpoints/005000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60b48a00af878ec29d655ffc90ecaaa1a2f8a377 --- /dev/null +++ b/checkpoints/005000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c60ad3a105d32e683cca20f9c08137e102e3c4bc01782dae77625db9b766228e +size 412659164 diff --git a/checkpoints/005000/training_state/rng_state.safetensors b/checkpoints/005000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d83aa268e8bbba16ece2a54285f240f442538bbf --- /dev/null +++ b/checkpoints/005000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f44bfb2ea15e9260b1f062989ef6959c142262d3d6b86afea6f91add197a184b +size 15708 diff --git a/checkpoints/005000/training_state/scheduler_state.json b/checkpoints/005000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..26e28372a197de2e43c4da4b379b2995d3c971c2 --- /dev/null +++ b/checkpoints/005000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 5000, + "_step_count": 5001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.34687384344914e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/005000/training_state/training_step.json b/checkpoints/005000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..592449d3fc8b35c30c604eb1dabe60537e8224a0 --- /dev/null +++ b/checkpoints/005000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 5000 +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/config.json b/checkpoints/006000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/006000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/model.safetensors b/checkpoints/006000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5c8101ed070a2c95fba3f9867f9f25ee9079aa7 --- /dev/null +++ b/checkpoints/006000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5175d32746b83065c8d58ca3dd9d5e43d7af5a64c5c856c2f9431f6fa462db47 +size 1197789224 diff --git a/checkpoints/006000/pretrained_model/policy_postprocessor.json b/checkpoints/006000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/006000/pretrained_model/policy_preprocessor.json b/checkpoints/006000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/006000/pretrained_model/train_config.json b/checkpoints/006000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/006000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/006000/training_state/optimizer_param_groups.json b/checkpoints/006000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..750f580a38652e26a746ee760bcc4c2ba96dc5cd --- /dev/null +++ b/checkpoints/006000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 9.06895784757787e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/006000/training_state/optimizer_state.safetensors b/checkpoints/006000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7edf69b20dd1b76333d6f7d7c83383c95e866daf --- /dev/null +++ b/checkpoints/006000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e64cd376690386a672193da7e235af214bab28aeceb06ea2aa22b0307908d41 +size 412659164 diff --git a/checkpoints/006000/training_state/rng_state.safetensors b/checkpoints/006000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a38f82f14208ab34553e3e3332ef2ff276d5d7b0 --- /dev/null +++ b/checkpoints/006000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:184d50651a6f6f28c58dbe6cc0d52637a4ed2efc4d627bb1cdfcc10d9998ffad +size 15708 diff --git a/checkpoints/006000/training_state/scheduler_state.json b/checkpoints/006000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f05b7ee5cb0734b15073331f47ba88becf9c725 --- /dev/null +++ b/checkpoints/006000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 6000, + "_step_count": 6001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.06895784757787e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/006000/training_state/training_step.json b/checkpoints/006000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..e267ac589be64705f8674638b9f5099c886778da --- /dev/null +++ b/checkpoints/006000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 6000 +} \ No newline at end of file diff --git a/checkpoints/007000/pretrained_model/config.json b/checkpoints/007000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/007000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/007000/pretrained_model/model.safetensors b/checkpoints/007000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cf3071b87448e97e41d8fdd8124b7062eedbecc --- /dev/null +++ b/checkpoints/007000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7723c122fe53857fd4b51f67b14ff64864785d07d73b79ccde32974a8a596c +size 1197789224 diff --git a/checkpoints/007000/pretrained_model/policy_postprocessor.json b/checkpoints/007000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/007000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/007000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/007000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/007000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/007000/pretrained_model/policy_preprocessor.json b/checkpoints/007000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/007000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/007000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/007000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/007000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/007000/pretrained_model/train_config.json b/checkpoints/007000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/007000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/007000/training_state/optimizer_param_groups.json b/checkpoints/007000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..13a88a2a4fec71f2176b72f804e85c08df6cca70 --- /dev/null +++ b/checkpoints/007000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 8.747831024202298e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/007000/training_state/optimizer_state.safetensors b/checkpoints/007000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9e1705c540caa7272ac3efcc34e7885d8f29f74b --- /dev/null +++ b/checkpoints/007000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:766d4c40c35fc3b4b692a6fb503c56d4916d7cdfb9cee283e0152f086de33d7b +size 412659164 diff --git a/checkpoints/007000/training_state/rng_state.safetensors b/checkpoints/007000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ff2c2c996500df9c73cf783250976c0ec9b52ed1 --- /dev/null +++ b/checkpoints/007000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d6ea12ab91faec1ff5379094512c5da89e9ab9161a0b2e377f256aae75fe189 +size 15708 diff --git a/checkpoints/007000/training_state/scheduler_state.json b/checkpoints/007000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..50cea3cb04a51f67020efa18d6fbdbaf862696fe --- /dev/null +++ b/checkpoints/007000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 7000, + "_step_count": 7001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 8.747831024202298e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/007000/training_state/training_step.json b/checkpoints/007000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..1f4c730c4d88ece076ae9d4e01ed7fbe60b0415d --- /dev/null +++ b/checkpoints/007000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 7000 +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/config.json b/checkpoints/008000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/008000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/model.safetensors b/checkpoints/008000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bb22024aa2cf318e2d692ea7c53d9a5741a56b07 --- /dev/null +++ b/checkpoints/008000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d584e54df46fb8362bde06804aa7f03ea596b5203c39c01ea350e6b2bb3d33f +size 1197789224 diff --git a/checkpoints/008000/pretrained_model/policy_postprocessor.json b/checkpoints/008000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/008000/pretrained_model/policy_preprocessor.json b/checkpoints/008000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/008000/pretrained_model/train_config.json b/checkpoints/008000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/008000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/008000/training_state/optimizer_param_groups.json b/checkpoints/008000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..3fac7e15881142baccc6efec07a9b141a0968be3 --- /dev/null +++ b/checkpoints/008000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 8.387011705999434e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/008000/training_state/optimizer_state.safetensors b/checkpoints/008000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6d1eaae428d2c72f774ddb22e3cf6f8532afb8c --- /dev/null +++ b/checkpoints/008000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f000dcdc1f1c674afd649edc2b81dbe3296b2cd538100269b49cec267c8c043 +size 412659164 diff --git a/checkpoints/008000/training_state/rng_state.safetensors b/checkpoints/008000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e05cf31c790920160988e839e13bfbc2ca9efbf --- /dev/null +++ b/checkpoints/008000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58d987828e71d7e75fe2795548ae883179c7d1b7ad940975df59ece17d84438a +size 15708 diff --git a/checkpoints/008000/training_state/scheduler_state.json b/checkpoints/008000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9e3f8e39b95a0781b46fcb209d81b782b515274 --- /dev/null +++ b/checkpoints/008000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 8000, + "_step_count": 8001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 8.387011705999434e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/008000/training_state/training_step.json b/checkpoints/008000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..74d8cba01cab8506617b2cbae6f268fe80fbfa79 --- /dev/null +++ b/checkpoints/008000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 8000 +} \ No newline at end of file diff --git a/checkpoints/009000/pretrained_model/config.json b/checkpoints/009000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/009000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/009000/pretrained_model/model.safetensors b/checkpoints/009000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..56f8c20386a13432df6ba2a54827090dc6135e2e --- /dev/null +++ b/checkpoints/009000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c82105d3bf29682df6a014eaa1c793a1794a608c830dc8513cea245d3979c67b +size 1197789224 diff --git a/checkpoints/009000/pretrained_model/policy_postprocessor.json b/checkpoints/009000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/009000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/009000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/009000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/009000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/009000/pretrained_model/policy_preprocessor.json b/checkpoints/009000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/009000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/009000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/009000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/009000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/009000/pretrained_model/train_config.json b/checkpoints/009000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/009000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/009000/training_state/optimizer_param_groups.json b/checkpoints/009000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..f8c743d5db8b7f147c4d955dfd3a08a802830512 --- /dev/null +++ b/checkpoints/009000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 7.990453104925807e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/009000/training_state/optimizer_state.safetensors b/checkpoints/009000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3a558905f0121fad49dd0df0bba7e88cdb0516c --- /dev/null +++ b/checkpoints/009000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43ece8c867ca782f95366a718f1879a4b85f7eb145993b991b9939bd3f8f7c99 +size 412659164 diff --git a/checkpoints/009000/training_state/rng_state.safetensors b/checkpoints/009000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0f35c5838735dc703366652f9611a554fd43f06c --- /dev/null +++ b/checkpoints/009000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c679ff6ef54ed7764a2be7b13acecd39fcdd3c3d21402099cf2775341b8c6a4 +size 15708 diff --git a/checkpoints/009000/training_state/scheduler_state.json b/checkpoints/009000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c398804d874e470783822a0e62c40412eb5cd096 --- /dev/null +++ b/checkpoints/009000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 9000, + "_step_count": 9001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 7.990453104925807e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/009000/training_state/training_step.json b/checkpoints/009000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7247596175281e9d3f295d2b6936944a71b64d34 --- /dev/null +++ b/checkpoints/009000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 9000 +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/config.json b/checkpoints/010000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/010000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/model.safetensors b/checkpoints/010000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b4449e406f316f08b4c83b5d66c7726201569fc --- /dev/null +++ b/checkpoints/010000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1ebdb6d3b9eab2761187d1cbe872e1887994dde8da5daa65bd1b6f4341beb43 +size 1197789224 diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor.json b/checkpoints/010000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor.json b/checkpoints/010000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/010000/pretrained_model/train_config.json b/checkpoints/010000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/010000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_param_groups.json b/checkpoints/010000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..1ee4c92efd08e25593632b371807c89fc1b8ecc2 --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 7.5625e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_state.safetensors b/checkpoints/010000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d0625397a58920d21107ddffac3cb26cd978663 --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70ecdae5030892b796b2c6d8c8bba6bd170ef243203f513a0066823aa2324d49 +size 412659164 diff --git a/checkpoints/010000/training_state/rng_state.safetensors b/checkpoints/010000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4bce1f185a8ed7966ac2bcbe6cd6e560ff2d835d --- /dev/null +++ b/checkpoints/010000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60d0edfa6c13ba617fc3245b7868c908195178ddccd92f2d5fbbf40ad9cf428f +size 15708 diff --git a/checkpoints/010000/training_state/scheduler_state.json b/checkpoints/010000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a6fabdcd1c2a4515b7818e55be9009a5f7ef1e --- /dev/null +++ b/checkpoints/010000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 10000, + "_step_count": 10001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 7.5625e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/training_step.json b/checkpoints/010000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7cb7c0986e9e7461ca851ce71e95d235ae3d2732 --- /dev/null +++ b/checkpoints/010000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 10000 +} \ No newline at end of file diff --git a/checkpoints/011000/pretrained_model/config.json b/checkpoints/011000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/011000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/011000/pretrained_model/model.safetensors b/checkpoints/011000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdcb1738ec72e04872c8cf41e6121c5bb67e5bbe --- /dev/null +++ b/checkpoints/011000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ecbf408e99b3baa91bfbfbc442ab182cc6d8c68c0c3100ec3f95e992ee56a8d0 +size 1197789224 diff --git a/checkpoints/011000/pretrained_model/policy_postprocessor.json b/checkpoints/011000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/011000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/011000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/011000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/011000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/011000/pretrained_model/policy_preprocessor.json b/checkpoints/011000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/011000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/011000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/011000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/011000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/011000/pretrained_model/train_config.json b/checkpoints/011000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/011000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/011000/training_state/optimizer_param_groups.json b/checkpoints/011000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..6367cb2cd92278b54bd0984c6f2a33ae6829b943 --- /dev/null +++ b/checkpoints/011000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 7.107841134994528e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/011000/training_state/optimizer_state.safetensors b/checkpoints/011000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41b63277a68e40da01d1f32d522d691d94a8f89e --- /dev/null +++ b/checkpoints/011000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd395ba64902565304a42c68eb7f3c67ac7b0ba3a06544eaec89330156915c48 +size 412659164 diff --git a/checkpoints/011000/training_state/rng_state.safetensors b/checkpoints/011000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bffda88a62bf81096db7a451506fe005062ca0d0 --- /dev/null +++ b/checkpoints/011000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac5ea940d4547d0659de31cdb9701f120e2a7b2a5f8f063677fd9dd931e33d7f +size 15708 diff --git a/checkpoints/011000/training_state/scheduler_state.json b/checkpoints/011000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ba964c0343329f42c3b3b289a93b249b5669bd9f --- /dev/null +++ b/checkpoints/011000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 11000, + "_step_count": 11001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 7.107841134994528e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/011000/training_state/training_step.json b/checkpoints/011000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..2389cf08c9a887d74ab54bbe783d6a4eccbbee72 --- /dev/null +++ b/checkpoints/011000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 11000 +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/config.json b/checkpoints/012000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/012000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/model.safetensors b/checkpoints/012000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3765d5dcdd25ac8399bf73583518753d3a4296da --- /dev/null +++ b/checkpoints/012000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:daf504c96e0db85a1ba5190806cf9d9c88e112e79f86b478a941aa9e520469b2 +size 1197789224 diff --git a/checkpoints/012000/pretrained_model/policy_postprocessor.json b/checkpoints/012000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/012000/pretrained_model/policy_preprocessor.json b/checkpoints/012000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/012000/pretrained_model/train_config.json b/checkpoints/012000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/012000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/012000/training_state/optimizer_param_groups.json b/checkpoints/012000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..bb2b9765a16cf0761fd1c871f16033b90c00716d --- /dev/null +++ b/checkpoints/012000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 6.631457847577869e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/012000/training_state/optimizer_state.safetensors b/checkpoints/012000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f3ab9a4be0f4f2c4a4d0a11d1c23f8c586818ef4 --- /dev/null +++ b/checkpoints/012000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f164670dd4ce82757af8757b45a7fdbcdf0483390412ca5c7feb0bbbca3b5c77 +size 412659164 diff --git a/checkpoints/012000/training_state/rng_state.safetensors b/checkpoints/012000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5eadf68a6f2d179a5815a47439143ef50e3f7554 --- /dev/null +++ b/checkpoints/012000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b18657071c0a00235a0a416df65f88c47691b0c30e2ebd582b503f09898a0c5 +size 15708 diff --git a/checkpoints/012000/training_state/scheduler_state.json b/checkpoints/012000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eceff7e52221004e23f6f617754e8849a67eeec0 --- /dev/null +++ b/checkpoints/012000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 12000, + "_step_count": 12001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.631457847577869e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/012000/training_state/training_step.json b/checkpoints/012000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fb27ac819b81943e6545c7c18510bdfb8eae1b --- /dev/null +++ b/checkpoints/012000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 12000 +} \ No newline at end of file diff --git a/checkpoints/013000/pretrained_model/config.json b/checkpoints/013000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/013000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/013000/pretrained_model/model.safetensors b/checkpoints/013000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43634234eb6b6a397e0946f70a16d9b648bc8ec1 --- /dev/null +++ b/checkpoints/013000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d56e396b4366761544b1d2a0ab42f653d13dbe3d3f808213ed6e9de22d2e45a3 +size 1197789224 diff --git a/checkpoints/013000/pretrained_model/policy_postprocessor.json b/checkpoints/013000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/013000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/013000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/013000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/013000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/013000/pretrained_model/policy_preprocessor.json b/checkpoints/013000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/013000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/013000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/013000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/013000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/013000/pretrained_model/train_config.json b/checkpoints/013000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/013000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/013000/training_state/optimizer_param_groups.json b/checkpoints/013000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..eb2141a5ea958150757b2e20abfdc1a267e9ea94 --- /dev/null +++ b/checkpoints/013000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 6.138569492736577e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/013000/training_state/optimizer_state.safetensors b/checkpoints/013000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d4a8983226770bc0e4be9286e665cdb7a4bbfb0c --- /dev/null +++ b/checkpoints/013000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8d8794d670d520d28462dc9342896ed0f344fdd154807363f5c3a751eff9c16a +size 412659164 diff --git a/checkpoints/013000/training_state/rng_state.safetensors b/checkpoints/013000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5448fe9e8de90ffbad10cf8d4fea3f6139dcce1 --- /dev/null +++ b/checkpoints/013000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bfe1edee024933c932d32479caa68f5b00063ce34489264a2f9cb32fe96b8c7 +size 15708 diff --git a/checkpoints/013000/training_state/scheduler_state.json b/checkpoints/013000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..913991b294e49857dd2bfa21d6fd23e64f25558c --- /dev/null +++ b/checkpoints/013000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 13000, + "_step_count": 13001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.138569492736577e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/013000/training_state/training_step.json b/checkpoints/013000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..6f5915e7fb38c851f5b48d58c1f66fe9c9d147ed --- /dev/null +++ b/checkpoints/013000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 13000 +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/config.json b/checkpoints/014000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/014000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/model.safetensors b/checkpoints/014000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab38426fea1bc4b90b7540f44bea05efb10bcade --- /dev/null +++ b/checkpoints/014000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dd78975df803df26a41ec142607534ef459c95d5f9a709118dc9add7a1908ff +size 1197789224 diff --git a/checkpoints/014000/pretrained_model/policy_postprocessor.json b/checkpoints/014000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/014000/pretrained_model/policy_preprocessor.json b/checkpoints/014000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/014000/pretrained_model/train_config.json b/checkpoints/014000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/014000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/014000/training_state/optimizer_param_groups.json b/checkpoints/014000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..d3796a6550f12d8f2537a7c9b328882a26a6358b --- /dev/null +++ b/checkpoints/014000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 5.634576258429812e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/014000/training_state/optimizer_state.safetensors b/checkpoints/014000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e6e75f2a8bef2556ab6c8895a62eeb0e815951f --- /dev/null +++ b/checkpoints/014000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea8256f26e7db0e2595b71328402186b8389acbcbe99ceb8e2c3c4f101602612 +size 412659164 diff --git a/checkpoints/014000/training_state/rng_state.safetensors b/checkpoints/014000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71dc3b14b3512b6c1a786823b624ebb147173017 --- /dev/null +++ b/checkpoints/014000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ce626c0bd422926390aa8a2c472e748472793153647f8e53f3c62f48a84de01 +size 15708 diff --git a/checkpoints/014000/training_state/scheduler_state.json b/checkpoints/014000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..610b30cf852264cfefc11a467be2a01d721d7d68 --- /dev/null +++ b/checkpoints/014000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 14000, + "_step_count": 14001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 5.634576258429812e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/014000/training_state/training_step.json b/checkpoints/014000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..560b82990f5c806d547ed82a5432ba60aec285e1 --- /dev/null +++ b/checkpoints/014000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 14000 +} \ No newline at end of file diff --git a/checkpoints/015000/pretrained_model/config.json b/checkpoints/015000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/015000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/015000/pretrained_model/model.safetensors b/checkpoints/015000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3f8e6873005d8fa2f7e18a6a5daff21f9c70b056 --- /dev/null +++ b/checkpoints/015000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1942230fe217d7e7305709439850fe523bd5b19290e5d3ff8aa1008a887299e9 +size 1197789224 diff --git a/checkpoints/015000/pretrained_model/policy_postprocessor.json b/checkpoints/015000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/015000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/015000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/015000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/015000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/015000/pretrained_model/policy_preprocessor.json b/checkpoints/015000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/015000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/015000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/015000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/015000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/015000/pretrained_model/train_config.json b/checkpoints/015000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/015000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/015000/training_state/optimizer_param_groups.json b/checkpoints/015000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..9277f6533d9b54979939a62759a6fbdec0e5ac6c --- /dev/null +++ b/checkpoints/015000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 5.125e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/015000/training_state/optimizer_state.safetensors b/checkpoints/015000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da87de77b964ca6a33cc06faff4f6f0d9e544a81 --- /dev/null +++ b/checkpoints/015000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce0646caac2b6734f1c45489d8a25023cb68165deffd176da23a9c4dfd85f01d +size 412659164 diff --git a/checkpoints/015000/training_state/rng_state.safetensors b/checkpoints/015000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b18a477a113e987f8e5a48a690e7960c4929d66b --- /dev/null +++ b/checkpoints/015000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52e1b508bbd8e3fe1c1d377de34b8a1883af43f464345e9c73888cd63b781d58 +size 15708 diff --git a/checkpoints/015000/training_state/scheduler_state.json b/checkpoints/015000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ba6b6927e93a4428e4458c8496e3590dcbbac247 --- /dev/null +++ b/checkpoints/015000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 15000, + "_step_count": 15001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 5.125e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/015000/training_state/training_step.json b/checkpoints/015000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..5cec056c8ba7f3c2e865a0f73ae59975a0503067 --- /dev/null +++ b/checkpoints/015000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 15000 +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/config.json b/checkpoints/016000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/016000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/model.safetensors b/checkpoints/016000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..161e2305cd4a56c5cdf3f81d1c2d31b590196446 --- /dev/null +++ b/checkpoints/016000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:21f7328f56f7bb2b36bbaf3f74299e6e3f00172786ed7cb60ad2eb4117baaa17 +size 1197789224 diff --git a/checkpoints/016000/pretrained_model/policy_postprocessor.json b/checkpoints/016000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/016000/pretrained_model/policy_preprocessor.json b/checkpoints/016000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/016000/pretrained_model/train_config.json b/checkpoints/016000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/016000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/016000/training_state/optimizer_param_groups.json b/checkpoints/016000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..86b52ec4cc86876f21b08f79ee06112b2092858c --- /dev/null +++ b/checkpoints/016000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 4.6154237415701904e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/016000/training_state/optimizer_state.safetensors b/checkpoints/016000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..517dc54d6befef43fa13e2ec0e2f680b14d5e231 --- /dev/null +++ b/checkpoints/016000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42d0493e0407168494858ee235c21443a7898be488d81a7849a72576c8477b0d +size 412659164 diff --git a/checkpoints/016000/training_state/rng_state.safetensors b/checkpoints/016000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7d9b73df1357f6b90ad053f4cdb347bd8a2f4de4 --- /dev/null +++ b/checkpoints/016000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a6c05379f2d4fb15c0c188c05cf57e9112790d3c58c36f5b7a23cf35c2b6752 +size 15708 diff --git a/checkpoints/016000/training_state/scheduler_state.json b/checkpoints/016000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4a80bbf6ff7dc87e4f7db21e884cdf5d7f2964 --- /dev/null +++ b/checkpoints/016000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 16000, + "_step_count": 16001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.6154237415701904e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/016000/training_state/training_step.json b/checkpoints/016000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..798020d90d4c39414dfab88708ecefd52f2e9ac5 --- /dev/null +++ b/checkpoints/016000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 16000 +} \ No newline at end of file diff --git a/checkpoints/017000/pretrained_model/config.json b/checkpoints/017000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/017000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/017000/pretrained_model/model.safetensors b/checkpoints/017000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5264fd1508c0c61a05400d65789b3a2e4ecb7331 --- /dev/null +++ b/checkpoints/017000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc8f7b41203642acf2953a341d1b16e6d3b6d6948bd75c93d68f8e326b1d0d91 +size 1197789224 diff --git a/checkpoints/017000/pretrained_model/policy_postprocessor.json b/checkpoints/017000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/017000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/017000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/017000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/017000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/017000/pretrained_model/policy_preprocessor.json b/checkpoints/017000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/017000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/017000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/017000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/017000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/017000/pretrained_model/train_config.json b/checkpoints/017000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/017000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/017000/training_state/optimizer_param_groups.json b/checkpoints/017000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..6fd4e01b9e1783084eaace3befc1810cf2375dcd --- /dev/null +++ b/checkpoints/017000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 4.1114305072634235e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/017000/training_state/optimizer_state.safetensors b/checkpoints/017000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..556d0678f77877cc4746eda6751daa9260a542bd --- /dev/null +++ b/checkpoints/017000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33c292ec730ed540a4bf1377ba9f21da79339fe76268ce2047d099a06af81bcc +size 412659164 diff --git a/checkpoints/017000/training_state/rng_state.safetensors b/checkpoints/017000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..afab91e0839f6a89d6d74e19cbed186cae674f87 --- /dev/null +++ b/checkpoints/017000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bc8f441fa0720e8668c19d504247b786a452f70b337f6f541cb7e895f917e45 +size 15708 diff --git a/checkpoints/017000/training_state/scheduler_state.json b/checkpoints/017000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ed9527c0a1e4f2fa3dcddb90c015bc3cf8100249 --- /dev/null +++ b/checkpoints/017000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 17000, + "_step_count": 17001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.1114305072634235e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/017000/training_state/training_step.json b/checkpoints/017000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c397907165bb49af5c8ed9f0802e1ca19fad31fb --- /dev/null +++ b/checkpoints/017000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 17000 +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/config.json b/checkpoints/018000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..639d90e191696dca004767d13cde1a74ab5d2bda --- /dev/null +++ b/checkpoints/018000/pretrained_model/config.json @@ -0,0 +1,117 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/model.safetensors b/checkpoints/018000/pretrained_model/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0c737c11e613e9f68604bd3e5160a4d490b233b --- /dev/null +++ b/checkpoints/018000/pretrained_model/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e13bb1f734ae6e589a5fba03d5f6bbeb15746fa1db0c3db285ee3cf5cc5d9b85 +size 1197789224 diff --git a/checkpoints/018000/pretrained_model/policy_postprocessor.json b/checkpoints/018000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/018000/pretrained_model/policy_preprocessor.json b/checkpoints/018000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..0f385ae81acb5b0984ccff867bbd11476774c619 --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,105 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..14dd13cd559dad4e62ac7c001d33e55154070e82 --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:287442861d5b2ab7b6af78b122213f68e9ed4b12d0cc56bc6d6a7dd92c322ca0 +size 6608 diff --git a/checkpoints/018000/pretrained_model/train_config.json b/checkpoints/018000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..99af8d54049e803beeb61d05b52cf21bb02ec4ea --- /dev/null +++ b/checkpoints/018000/pretrained_model/train_config.json @@ -0,0 +1,213 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval1_fixed_merged_remap", + "root": "/dev/shm/lerobot_datasets/eval1_fixed_merged_remap", + "episodes": null, + "image_transforms": { + "enable": true, + "max_num_transforms": 2, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "return_uint8": false, + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.empty_camera_0": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + }, + "observation.images.empty_camera_1": { + "type": "VISUAL", + "shape": [ + 3, + 480, + 640 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": false, + "push_to_hub": true, + "repo_id": "robot-learning-group47/eval1_fixed_merged_from_base_lightingonly", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "lerobot/smolvla_base", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 256, + 256 + ], + "empty_cameras": 2, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": false, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": -1, + "pad_language_to": "longest", + "num_expert_layers": -1, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "reward_model": null, + "output_dir": "outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "job_name": "eval1_fixed_merged_from_base_lightingonly_20260518_165453", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 8, + "batch_size": 64, + "prefetch_factor": 2, + "persistent_workers": true, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 20, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 1000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 8, + "use_async_envs": true + }, + "wandb": { + "enable": true, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": "vjy2hgq7", + "mode": null, + "add_tags": true + }, + "peft": null, + "sample_weighting": null, + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/018000/training_state/optimizer_param_groups.json b/checkpoints/018000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..9646eb8ac6dd31c190f0b6b103380fcd55835c06 --- /dev/null +++ b/checkpoints/018000/training_state/optimizer_param_groups.json @@ -0,0 +1,521 @@ +[ + { + "lr": 3.618542152422132e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499 + ] + } +] \ No newline at end of file diff --git a/checkpoints/018000/training_state/optimizer_state.safetensors b/checkpoints/018000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3372746468311f65e3e57ded05f2ecf8887fe859 --- /dev/null +++ b/checkpoints/018000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc0f607828b198347149deea026bd278f90b3b1e783bddf22fabad0908652181 +size 412659164 diff --git a/checkpoints/018000/training_state/rng_state.safetensors b/checkpoints/018000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55d788f5a3301d81b366fa21ccd6df4d89ca8c7b --- /dev/null +++ b/checkpoints/018000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcb5f18ffca83201c4fe82114b570253c89b56b51bf6b5d5ff3dc67d5830370 +size 15708 diff --git a/checkpoints/018000/training_state/scheduler_state.json b/checkpoints/018000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9be428b0008fceed41d068253231f415d4be6678 --- /dev/null +++ b/checkpoints/018000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 18000, + "_step_count": 18001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.618542152422132e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/018000/training_state/training_step.json b/checkpoints/018000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..332a3c17a399c305da7a515c33ffb382d0aff339 --- /dev/null +++ b/checkpoints/018000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 18000 +} \ No newline at end of file diff --git a/wandb/debug-internal.log b/wandb/debug-internal.log new file mode 100644 index 0000000000000000000000000000000000000000..a34d71ba19b49f30920a055c7e672615d874dc10 --- /dev/null +++ b/wandb/debug-internal.log @@ -0,0 +1,6 @@ +{"time":"2026-05-18T16:55:01.314305568Z","level":"INFO","msg":"stream: starting","core version":"0.24.2"} +{"time":"2026-05-18T16:55:01.61689597Z","level":"INFO","msg":"stream: created new stream","id":"vjy2hgq7"} +{"time":"2026-05-18T16:55:01.617008751Z","level":"INFO","msg":"handler: started","stream_id":"vjy2hgq7"} +{"time":"2026-05-18T16:55:01.617395147Z","level":"INFO","msg":"stream: started","id":"vjy2hgq7"} +{"time":"2026-05-18T16:55:01.617431898Z","level":"INFO","msg":"sender: started","stream_id":"vjy2hgq7"} +{"time":"2026-05-18T16:55:01.617428308Z","level":"INFO","msg":"writer: started","stream_id":"vjy2hgq7"} diff --git a/wandb/debug.log b/wandb/debug.log new file mode 100644 index 0000000000000000000000000000000000000000..e2780011629db2e9de81fbd9d6c282d08c0c4a3f --- /dev/null +++ b/wandb/debug.log @@ -0,0 +1,19 @@ +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_setup.py:_flush():81] Current SDK version is 0.24.2 +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_setup.py:_flush():81] Configure stats pid to 44311 +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_setup.py:_flush():81] Loading settings from environment variables +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:setup_run_log_directory():717] Logging user logs to outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453/wandb/run-20260518_165501-vjy2hgq7/logs/debug.log +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:setup_run_log_directory():718] Logging internal logs to outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453/wandb/run-20260518_165501-vjy2hgq7/logs/debug-internal.log +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:init():844] calling init triggers +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:init():849] wandb.init called with sweep_config: {} +config: {'dataset': {'repo_id': 'robot-learning-group47/eval1_fixed_merged_remap', 'root': '/dev/shm/lerobot_datasets/eval1_fixed_merged_remap', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 2, 'random_order': False, 'tfs': {'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'return_uint8': False, 'streaming': False}, 'env': None, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'input_features': {'observation.state': {'type': , 'shape': [6]}, 'observation.images.camera1': {'type': , 'shape': [3, 256, 256]}, 'observation.images.camera2': {'type': , 'shape': [3, 256, 256]}, 'observation.images.camera3': {'type': , 'shape': [3, 256, 256]}}, 'output_features': {'action': {'type': , 'shape': [6]}}, 'device': 'cuda', 'use_amp': False, 'use_peft': False, 'push_to_hub': True, 'repo_id': 'robot-learning-group47/eval1_fixed_merged_from_base_lightingonly', 'private': None, 'tags': None, 'license': None, 'pretrained_path': 'lerobot/smolvla_base', 'chunk_size': 50, 'n_action_steps': 50, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [256, 256], 'empty_cameras': 2, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': True, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': False, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': -1, 'pad_language_to': 'longest', 'num_expert_layers': -1, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'rtc_config': None, 'compile_model': False, 'compile_mode': 'max-autotune'}, 'reward_model': None, 'output_dir': 'outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453', 'job_name': 'eval1_fixed_merged_from_base_lightingonly_20260518_165453', 'resume': False, 'seed': 1000, 'cudnn_deterministic': False, 'num_workers': 8, 'batch_size': 64, 'prefetch_factor': 2, 'persistent_workers': True, 'steps': 30000, 'eval_freq': 20000, 'log_freq': 20, 'tolerance_s': 0.0001, 'save_checkpoint': True, 'save_freq': 1000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 8, 'use_async_envs': True}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': None, 'mode': None, 'add_tags': True}, 'peft': None, 'sample_weighting': None, 'rename_map': {'observation.images.front': 'observation.images.camera1'}, 'checkpoint_path': None, '_wandb': {}} +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:init():892] starting backend +2026-05-18 16:55:01,309 INFO MainThread:44311 [wandb_init.py:init():895] sending inform_init request +2026-05-18 16:55:01,312 INFO MainThread:44311 [wandb_init.py:init():903] backend started and connected +2026-05-18 16:55:01,314 INFO MainThread:44311 [wandb_init.py:init():973] updated telemetry +2026-05-18 16:55:01,317 INFO MainThread:44311 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-05-18 16:55:02,162 INFO MainThread:44311 [wandb_init.py:init():1042] starting run threads in backend +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_console_start():2529] atexit reg +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2469] Redirects installed. +2026-05-18 16:55:02,218 INFO MainThread:44311 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/wandb/run-20260518_165501-vjy2hgq7/files/output.log b/wandb/run-20260518_165501-vjy2hgq7/files/output.log new file mode 100644 index 0000000000000000000000000000000000000000..6fefee86974403fd24024c01ce690f5db9ef6017 --- /dev/null +++ b/wandb/run-20260518_165501-vjy2hgq7/files/output.log @@ -0,0 +1,973 @@ +INFO 2026-05-18 16:55:02 db_utils.py:121 Logs will be synced with wandb. +INFO 2026-05-18 16:55:02 db_utils.py:122 Track this run --> https://wandb.ai/nagyungecole-eth-z-rich/lerobot/runs/vjy2hgq7 +INFO 2026-05-18 16:55:02 ot_train.py:236 Creating dataset +INFO 2026-05-18 16:55:02 ot_train.py:270 Creating policy +Reducing the number of VLM layers to 16 ... +INFO 2026-05-18 16:55:13 ot_train.py:347 Creating optimizer and scheduler +INFO 2026-05-18 16:55:13 ot_train.py:374 Output dir: outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453 +INFO 2026-05-18 16:55:13 ot_train.py:381 cfg.steps=30000 (30K) +INFO 2026-05-18 16:55:13 ot_train.py:382 dataset.num_frames=16740 (17K) +INFO 2026-05-18 16:55:13 ot_train.py:383 dataset.num_episodes=93 +INFO 2026-05-18 16:55:13 ot_train.py:386 Effective batch size: 64 x 1 = 64 +INFO 2026-05-18 16:55:13 ot_train.py:387 num_learnable_params=99880992 (100M) +INFO 2026-05-18 16:55:13 ot_train.py:388 num_total_params=450046176 (450M) +Training: 0%| | 0/30000 [00:00, 'shape': [6]}, 'observation.images.camera1': {'type': , 'shape': [3, 256, 256]}, 'observation.images.camera2': {'type': , 'shape': [3, 256, 256]}, 'observation.images.camera3': {'type': , 'shape': [3, 256, 256]}}, 'output_features': {'action': {'type': , 'shape': [6]}}, 'device': 'cuda', 'use_amp': False, 'use_peft': False, 'push_to_hub': True, 'repo_id': 'robot-learning-group47/eval1_fixed_merged_from_base_lightingonly', 'private': None, 'tags': None, 'license': None, 'pretrained_path': 'lerobot/smolvla_base', 'chunk_size': 50, 'n_action_steps': 50, 'normalization_mapping': {'VISUAL': , 'STATE': , 'ACTION': }, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [256, 256], 'empty_cameras': 2, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': True, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': False, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': -1, 'pad_language_to': 'longest', 'num_expert_layers': -1, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0, 'rtc_config': None, 'compile_model': False, 'compile_mode': 'max-autotune'}, 'reward_model': None, 'output_dir': 'outputs/train/eval1_fixed_merged_from_base_lightingonly_20260518_165453', 'job_name': 'eval1_fixed_merged_from_base_lightingonly_20260518_165453', 'resume': False, 'seed': 1000, 'cudnn_deterministic': False, 'num_workers': 8, 'batch_size': 64, 'prefetch_factor': 2, 'persistent_workers': True, 'steps': 30000, 'eval_freq': 20000, 'log_freq': 20, 'tolerance_s': 0.0001, 'save_checkpoint': True, 'save_freq': 1000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 8, 'use_async_envs': True}, 'wandb': {'enable': True, 'disable_artifact': False, 'project': 'lerobot', 'entity': None, 'notes': None, 'run_id': None, 'mode': None, 'add_tags': True}, 'peft': None, 'sample_weighting': None, 'rename_map': {'observation.images.front': 'observation.images.camera1'}, 'checkpoint_path': None, '_wandb': {}} +2026-05-18 16:55:01,094 INFO MainThread:44311 [wandb_init.py:init():892] starting backend +2026-05-18 16:55:01,309 INFO MainThread:44311 [wandb_init.py:init():895] sending inform_init request +2026-05-18 16:55:01,312 INFO MainThread:44311 [wandb_init.py:init():903] backend started and connected +2026-05-18 16:55:01,314 INFO MainThread:44311 [wandb_init.py:init():973] updated telemetry +2026-05-18 16:55:01,317 INFO MainThread:44311 [wandb_init.py:init():997] communicating run to backend with 90.0 second timeout +2026-05-18 16:55:02,162 INFO MainThread:44311 [wandb_init.py:init():1042] starting run threads in backend +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_console_start():2529] atexit reg +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2377] redirect: wrap_raw +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2446] Wrapping output streams. +2026-05-18 16:55:02,215 INFO MainThread:44311 [wandb_run.py:_redirect():2469] Redirects installed. +2026-05-18 16:55:02,218 INFO MainThread:44311 [wandb_init.py:init():1082] run started, returning control to user process diff --git a/wandb/run-20260518_165501-vjy2hgq7/run-vjy2hgq7.wandb b/wandb/run-20260518_165501-vjy2hgq7/run-vjy2hgq7.wandb new file mode 100644 index 0000000000000000000000000000000000000000..b602ad395f538fc2b26720e1bf987065cce05cfc --- /dev/null +++ b/wandb/run-20260518_165501-vjy2hgq7/run-vjy2hgq7.wandb @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2da1152ec7d867ccc1e1a11450bc8724fb1fb9757c662d988ddfd953e3a393 +size 5079040