diff --git a/checkpoints/002000/pretrained_model/README.md b/checkpoints/002000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/002000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/adapter_config.json b/checkpoints/002000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/002000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/adapter_model.safetensors b/checkpoints/002000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0537f2efbcaf6375c8facf6f7308c85f4ab2df99 --- /dev/null +++ b/checkpoints/002000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2342ba3060294c0718b2b2e1050df2cf1122a611542921b0673773531619b6f +size 32895960 diff --git a/checkpoints/002000/pretrained_model/config.json b/checkpoints/002000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/002000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor.json b/checkpoints/002000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor.json b/checkpoints/002000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/002000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/002000/pretrained_model/train_config.json b/checkpoints/002000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/002000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_param_groups.json b/checkpoints/002000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..32bae394ef863ace6d62f40e2393f5714a1c75e6 --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 9.893469553577303e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/002000/training_state/optimizer_state.safetensors b/checkpoints/002000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4774aa0a8795920fc772e6156c497039695c7624 --- /dev/null +++ b/checkpoints/002000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:32f1b5e9d7c04682a908c709217ef5fc4b7c03776859b0975af8fc2e8858d0f8 +size 64794928 diff --git a/checkpoints/002000/training_state/rng_state.safetensors b/checkpoints/002000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ababa8d8ceff12fbec0870c1cfaa6b962ecc60f --- /dev/null +++ b/checkpoints/002000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f0dbf1afbf99f75bc960c5734cd621f966263c389cb625edcafa4c0dc2786d4 +size 15708 diff --git a/checkpoints/002000/training_state/scheduler_state.json b/checkpoints/002000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..1c7653e306023b5aa9a3823ac7b9f430e5116428 --- /dev/null +++ b/checkpoints/002000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 2000, + "_step_count": 2001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.893469553577303e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/002000/training_state/training_step.json b/checkpoints/002000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..39090bbb986edb821e1602990d19357dcdb5d2ae --- /dev/null +++ b/checkpoints/002000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 2000 +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/README.md b/checkpoints/004000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/004000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/adapter_config.json b/checkpoints/004000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/004000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/adapter_model.safetensors b/checkpoints/004000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b2329a20b245c4ad34e08cf2c61575bf37fcbbc --- /dev/null +++ b/checkpoints/004000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff87eb4fc46261ac1d53d5bf774d2f1e914dc6bbec4fd0a4840e05b2f9c9e137 +size 32895960 diff --git a/checkpoints/004000/pretrained_model/config.json b/checkpoints/004000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/004000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/policy_postprocessor.json b/checkpoints/004000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/004000/pretrained_model/policy_preprocessor.json b/checkpoints/004000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/004000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/004000/pretrained_model/train_config.json b/checkpoints/004000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/004000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/004000/training_state/optimizer_param_groups.json b/checkpoints/004000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..66ed112dc078f2531e30ee1206c545b3034b27f3 --- /dev/null +++ b/checkpoints/004000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 9.578534106007679e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/004000/training_state/optimizer_state.safetensors b/checkpoints/004000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e9ce26174c667f582f8758e15199cbd4195ea84 --- /dev/null +++ b/checkpoints/004000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26ffcba2c886845de2e9272edb66f627bb589bf6140a94140ae53cb92dc8ae8 +size 64794928 diff --git a/checkpoints/004000/training_state/rng_state.safetensors b/checkpoints/004000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffa0b3f569b9edbffe47e0e22f96d1dc3e7dd1a3 --- /dev/null +++ b/checkpoints/004000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529ef0c04ddf7db3007558121ff090200139ef9018571bf3ff4de5d1a486ef97 +size 15708 diff --git a/checkpoints/004000/training_state/scheduler_state.json b/checkpoints/004000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b4732a3f05f5197879a13fa628a896a2aba5f50 --- /dev/null +++ b/checkpoints/004000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 4000, + "_step_count": 4001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.578534106007679e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/004000/training_state/training_step.json b/checkpoints/004000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..75d870521c191f77dd9eaa4d83486eab6e768f69 --- /dev/null +++ b/checkpoints/004000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 4000 +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/README.md b/checkpoints/006000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/006000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/adapter_config.json b/checkpoints/006000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/006000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/adapter_model.safetensors b/checkpoints/006000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12fc8eea057069d904720b8d13a482c85fb2e52e --- /dev/null +++ b/checkpoints/006000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8651d68801e678b37ad3a40a1916c6010b17541916526f6ea5e3698d2474101b +size 32895960 diff --git a/checkpoints/006000/pretrained_model/config.json b/checkpoints/006000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/006000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/policy_postprocessor.json b/checkpoints/006000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/006000/pretrained_model/policy_preprocessor.json b/checkpoints/006000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/006000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/006000/pretrained_model/train_config.json b/checkpoints/006000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/006000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/006000/training_state/optimizer_param_groups.json b/checkpoints/006000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..f1770230fe82dd05c43134080693086cad7a53a2 --- /dev/null +++ b/checkpoints/006000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 9.06895784757787e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/006000/training_state/optimizer_state.safetensors b/checkpoints/006000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b28adaffc4d8f54ac3fb69cc9a23eef0032f21a --- /dev/null +++ b/checkpoints/006000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed399ff0680247088adcd4bbd32bb52119c4bf8579336e31be71a7bd09b6fada +size 64794928 diff --git a/checkpoints/006000/training_state/rng_state.safetensors b/checkpoints/006000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eb06f4482f3077eeed706a61eab98311a918aa53 --- /dev/null +++ b/checkpoints/006000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e504b1cadc949107457f41af8ce5fae2e67acf7990a803afcf949b8809ceb7be +size 15708 diff --git a/checkpoints/006000/training_state/scheduler_state.json b/checkpoints/006000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3f05b7ee5cb0734b15073331f47ba88becf9c725 --- /dev/null +++ b/checkpoints/006000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 6000, + "_step_count": 6001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 9.06895784757787e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/006000/training_state/training_step.json b/checkpoints/006000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..e267ac589be64705f8674638b9f5099c886778da --- /dev/null +++ b/checkpoints/006000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 6000 +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/README.md b/checkpoints/008000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/008000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/adapter_config.json b/checkpoints/008000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/008000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/adapter_model.safetensors b/checkpoints/008000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f494b98cad29000d83da6c94c7cccb733c69851 --- /dev/null +++ b/checkpoints/008000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07cf9e77a3abf1e190a1c5063d685b1318616da745e382e897e43e7598f22404 +size 32895960 diff --git a/checkpoints/008000/pretrained_model/config.json b/checkpoints/008000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/008000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/policy_postprocessor.json b/checkpoints/008000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/008000/pretrained_model/policy_preprocessor.json b/checkpoints/008000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/008000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/008000/pretrained_model/train_config.json b/checkpoints/008000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/008000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/008000/training_state/optimizer_param_groups.json b/checkpoints/008000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..8e0a4ba2ad1d8990f9be5171e6de39b983646c70 --- /dev/null +++ b/checkpoints/008000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 8.387011705999434e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/008000/training_state/optimizer_state.safetensors b/checkpoints/008000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..34c0b86998d9ad2eeba67f0371ee2893e361f7ca --- /dev/null +++ b/checkpoints/008000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4edab058d9799d79027e056d027bdad872af6e75cfb43cb57a5698ced0a1f523 +size 64794928 diff --git a/checkpoints/008000/training_state/rng_state.safetensors b/checkpoints/008000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1619da0045e43de5b9dcb500c9888ce01239916e --- /dev/null +++ b/checkpoints/008000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bcda426906cda0e88145ce7117458f27585723496a8386187cb62ac8425c468 +size 15708 diff --git a/checkpoints/008000/training_state/scheduler_state.json b/checkpoints/008000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e9e3f8e39b95a0781b46fcb209d81b782b515274 --- /dev/null +++ b/checkpoints/008000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 8000, + "_step_count": 8001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 8.387011705999434e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/008000/training_state/training_step.json b/checkpoints/008000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..74d8cba01cab8506617b2cbae6f268fe80fbfa79 --- /dev/null +++ b/checkpoints/008000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 8000 +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/README.md b/checkpoints/010000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/010000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/adapter_config.json b/checkpoints/010000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/010000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/adapter_model.safetensors b/checkpoints/010000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..987678af00ddfd1428a6fb4c84f3bfb28b932802 --- /dev/null +++ b/checkpoints/010000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b82d5d715d2f4d60ec8b3584fcd0ce03364825317be632043bf65a524ad038e +size 32895960 diff --git a/checkpoints/010000/pretrained_model/config.json b/checkpoints/010000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/010000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor.json b/checkpoints/010000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor.json b/checkpoints/010000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/010000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/010000/pretrained_model/train_config.json b/checkpoints/010000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/010000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_param_groups.json b/checkpoints/010000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..a0c6ae3f8e42d19490dac7e04a1e5c593c3ec06d --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 7.5625e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/010000/training_state/optimizer_state.safetensors b/checkpoints/010000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..095a9aa295a07044219e3cf66320dcdc5cdd1331 --- /dev/null +++ b/checkpoints/010000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:80fd86411c525677fb3462e0cda24cbc12e8400513d90696282cc0ded74a78ed +size 64794928 diff --git a/checkpoints/010000/training_state/rng_state.safetensors b/checkpoints/010000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1146a6c474a22853ce9b20e890635b05e836e1c0 --- /dev/null +++ b/checkpoints/010000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ca8eb5d2ad88c9815ff62cf606af25558ec81fb912b62b9d8485ca4803f4563 +size 15708 diff --git a/checkpoints/010000/training_state/scheduler_state.json b/checkpoints/010000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c1a6fabdcd1c2a4515b7818e55be9009a5f7ef1e --- /dev/null +++ b/checkpoints/010000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 10000, + "_step_count": 10001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 7.5625e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/010000/training_state/training_step.json b/checkpoints/010000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..7cb7c0986e9e7461ca851ce71e95d235ae3d2732 --- /dev/null +++ b/checkpoints/010000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 10000 +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/README.md b/checkpoints/012000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/012000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/adapter_config.json b/checkpoints/012000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/012000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/adapter_model.safetensors b/checkpoints/012000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..75117bec1d5c8e37e8f7110de300397b98b3e9e9 --- /dev/null +++ b/checkpoints/012000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f421f627ed2e5bcd143d69b1e97b5550d49f9b9013ea9f7ad64d5db79f852a03 +size 32895960 diff --git a/checkpoints/012000/pretrained_model/config.json b/checkpoints/012000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/012000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/policy_postprocessor.json b/checkpoints/012000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/012000/pretrained_model/policy_preprocessor.json b/checkpoints/012000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/012000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/012000/pretrained_model/train_config.json b/checkpoints/012000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/012000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/012000/training_state/optimizer_param_groups.json b/checkpoints/012000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..9116ce9bd7d37b9a4b16b233e9621f52a3ee9aa0 --- /dev/null +++ b/checkpoints/012000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 6.631457847577869e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/012000/training_state/optimizer_state.safetensors b/checkpoints/012000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8b47b34354755fb924f38651fc23e1a62b8f173 --- /dev/null +++ b/checkpoints/012000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0ed61d1fda1555bafb4fbfb6cbf3f0f8acdc621b71011536ffaf1a9ca74c2e +size 64794928 diff --git a/checkpoints/012000/training_state/rng_state.safetensors b/checkpoints/012000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bc0b246101e2f07720027cf7e4d7845de3bcc74 --- /dev/null +++ b/checkpoints/012000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6dc268b995ea552b2f2217fd0b30f569016b9e9f233d6200e4a2b62224639e5 +size 15708 diff --git a/checkpoints/012000/training_state/scheduler_state.json b/checkpoints/012000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..eceff7e52221004e23f6f617754e8849a67eeec0 --- /dev/null +++ b/checkpoints/012000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 12000, + "_step_count": 12001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.631457847577869e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/012000/training_state/training_step.json b/checkpoints/012000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..c4fb27ac819b81943e6545c7c18510bdfb8eae1b --- /dev/null +++ b/checkpoints/012000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 12000 +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/README.md b/checkpoints/014000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/014000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/adapter_config.json b/checkpoints/014000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/014000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/adapter_model.safetensors b/checkpoints/014000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..80362f82663f386d4e0785da930d83698065f310 --- /dev/null +++ b/checkpoints/014000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f897632b9b179c243d6e922046d6638ffdf1a3b9011c3190a939ef3cbe895c4 +size 32895960 diff --git a/checkpoints/014000/pretrained_model/config.json b/checkpoints/014000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/014000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/policy_postprocessor.json b/checkpoints/014000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/014000/pretrained_model/policy_preprocessor.json b/checkpoints/014000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/014000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/014000/pretrained_model/train_config.json b/checkpoints/014000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/014000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/014000/training_state/optimizer_param_groups.json b/checkpoints/014000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..80d011c10d09f7a151456c882e85701f501936fa --- /dev/null +++ b/checkpoints/014000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 5.634576258429812e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/014000/training_state/optimizer_state.safetensors b/checkpoints/014000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7c723dcf9ada0ce007f546e0f23927794a8877d0 --- /dev/null +++ b/checkpoints/014000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ab69533183562d14714d77445368d574cc1ab350ac60de78fce27458f86082a +size 64794928 diff --git a/checkpoints/014000/training_state/rng_state.safetensors b/checkpoints/014000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..706d4b4f87a1e48dd6cfe0d67dd1fa1c72c2a4d0 --- /dev/null +++ b/checkpoints/014000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33407a126d3b3aad9bffacc9f14ac54a188196c105b1784f9633b00a8a6ac637 +size 15708 diff --git a/checkpoints/014000/training_state/scheduler_state.json b/checkpoints/014000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..610b30cf852264cfefc11a467be2a01d721d7d68 --- /dev/null +++ b/checkpoints/014000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 14000, + "_step_count": 14001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 5.634576258429812e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/014000/training_state/training_step.json b/checkpoints/014000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..560b82990f5c806d547ed82a5432ba60aec285e1 --- /dev/null +++ b/checkpoints/014000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 14000 +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/README.md b/checkpoints/016000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/016000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/adapter_config.json b/checkpoints/016000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/016000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/adapter_model.safetensors b/checkpoints/016000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..87994e33641679553e8d11461af9509b3b36472b --- /dev/null +++ b/checkpoints/016000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f352ee2011100b2102b55a242029523e9e951cb4d6a4cf4eac6de78f94b68653 +size 32895960 diff --git a/checkpoints/016000/pretrained_model/config.json b/checkpoints/016000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/016000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/policy_postprocessor.json b/checkpoints/016000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/016000/pretrained_model/policy_preprocessor.json b/checkpoints/016000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/016000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/016000/pretrained_model/train_config.json b/checkpoints/016000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/016000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/016000/training_state/optimizer_param_groups.json b/checkpoints/016000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..4f9ea2f6cf603ea004cb07a9489108949517a5ee --- /dev/null +++ b/checkpoints/016000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 4.6154237415701904e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/016000/training_state/optimizer_state.safetensors b/checkpoints/016000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddd215486b6209346ea55082c565800352856a97 --- /dev/null +++ b/checkpoints/016000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf8d735baa32a3418f65ae7414985ac43078162fcbb4c4415739259991fac6c +size 64794928 diff --git a/checkpoints/016000/training_state/rng_state.safetensors b/checkpoints/016000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a48b8b60bd4a30492d75c39899240a70ee99cc55 --- /dev/null +++ b/checkpoints/016000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5eac6bd3e687535e41e26ababc8e8429261206c29504014ce593b0a1f286eb6d +size 15708 diff --git a/checkpoints/016000/training_state/scheduler_state.json b/checkpoints/016000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9e4a80bbf6ff7dc87e4f7db21e884cdf5d7f2964 --- /dev/null +++ b/checkpoints/016000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 16000, + "_step_count": 16001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 4.6154237415701904e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/016000/training_state/training_step.json b/checkpoints/016000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..798020d90d4c39414dfab88708ecefd52f2e9ac5 --- /dev/null +++ b/checkpoints/016000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 16000 +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/README.md b/checkpoints/018000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/018000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/adapter_config.json b/checkpoints/018000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/018000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/adapter_model.safetensors b/checkpoints/018000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b1b0da9c8f18ebae437c8f53940135c16984d5b4 --- /dev/null +++ b/checkpoints/018000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f8f044721c87534cb1859449ab6b8ad2e6b73d6c4b1736181f53bffeebee47 +size 32895960 diff --git a/checkpoints/018000/pretrained_model/config.json b/checkpoints/018000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/018000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/policy_postprocessor.json b/checkpoints/018000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/018000/pretrained_model/policy_preprocessor.json b/checkpoints/018000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/018000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/018000/pretrained_model/train_config.json b/checkpoints/018000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/018000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/018000/training_state/optimizer_param_groups.json b/checkpoints/018000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..2af1432a7983cb6411dab391604cd3e42f0124cc --- /dev/null +++ b/checkpoints/018000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 3.618542152422132e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/018000/training_state/optimizer_state.safetensors b/checkpoints/018000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6c54dec87c80ab10bd7bc1db9f52c835c03e6e2 --- /dev/null +++ b/checkpoints/018000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2398ed767d24dcafdc32671e6a56d9aa0af05fd71f36538dad845ab9d49070a2 +size 64794928 diff --git a/checkpoints/018000/training_state/rng_state.safetensors b/checkpoints/018000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfa97f25a61961327d2576e4cb206bd929722015 --- /dev/null +++ b/checkpoints/018000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f99894c79a7b5b48f1367fc8263415bbe1526285ebdeaca0938722dfbc55b0b +size 15708 diff --git a/checkpoints/018000/training_state/scheduler_state.json b/checkpoints/018000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9be428b0008fceed41d068253231f415d4be6678 --- /dev/null +++ b/checkpoints/018000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 18000, + "_step_count": 18001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.618542152422132e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/018000/training_state/training_step.json b/checkpoints/018000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..332a3c17a399c305da7a515c33ffb382d0aff339 --- /dev/null +++ b/checkpoints/018000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 18000 +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/README.md b/checkpoints/020000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/020000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/adapter_config.json b/checkpoints/020000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/020000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/adapter_model.safetensors b/checkpoints/020000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fdf5c9a142d854129bc5a2540fad5bb7d8250540 --- /dev/null +++ b/checkpoints/020000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cac2a9c1ae773998915ad8ebb4c893f5ed289bbc47481d07819a7624ddf1ca19 +size 32895960 diff --git a/checkpoints/020000/pretrained_model/config.json b/checkpoints/020000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/020000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/policy_postprocessor.json b/checkpoints/020000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/020000/pretrained_model/policy_preprocessor.json b/checkpoints/020000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/020000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/020000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/020000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/020000/pretrained_model/train_config.json b/checkpoints/020000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/020000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_param_groups.json b/checkpoints/020000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..83e6d84e7b99d977d3516c9728920f64e6b8f0f6 --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 2.6875000000000013e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/020000/training_state/optimizer_state.safetensors b/checkpoints/020000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ab0dfb7e3f8a7ccd7592d21ddddff64f1a6669b1 --- /dev/null +++ b/checkpoints/020000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc9127e5645ab279fa0c5d39f8b0bfda5d334151eff44981341560a3bba699ce +size 64794928 diff --git a/checkpoints/020000/training_state/rng_state.safetensors b/checkpoints/020000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c17f03128277e1f1e145494a70279960004e7a06 --- /dev/null +++ b/checkpoints/020000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860bc8899431d7c4ab696d77b02e3bb08754a037c508bde2be82ef42d76051ab +size 15708 diff --git a/checkpoints/020000/training_state/scheduler_state.json b/checkpoints/020000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..bc8702f14e2b5c016ee125c7113babe553dd2e53 --- /dev/null +++ b/checkpoints/020000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 20000, + "_step_count": 20001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.6875000000000013e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/020000/training_state/training_step.json b/checkpoints/020000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..dc9bb47026c5d5237ca6fc5dbff6020dd122ea05 --- /dev/null +++ b/checkpoints/020000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 20000 +} \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/README.md b/checkpoints/022000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/022000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/adapter_config.json b/checkpoints/022000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/022000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/adapter_model.safetensors b/checkpoints/022000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1123c25576eeba2015f56b0a8b8e097bdbd50b30 --- /dev/null +++ b/checkpoints/022000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52c59f5a80796ae17bc98b8ab0dddaf404f0d4dc414a137ddab86fde85218cfb +size 32895960 diff --git a/checkpoints/022000/pretrained_model/config.json b/checkpoints/022000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/022000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/policy_postprocessor.json b/checkpoints/022000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/022000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/022000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/022000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/022000/pretrained_model/policy_preprocessor.json b/checkpoints/022000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/022000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/022000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/022000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/022000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/022000/pretrained_model/train_config.json b/checkpoints/022000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/022000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/022000/training_state/optimizer_param_groups.json b/checkpoints/022000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..e69b920d643af93884756041e686ecbd40366437 --- /dev/null +++ b/checkpoints/022000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 1.862988294000568e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/022000/training_state/optimizer_state.safetensors b/checkpoints/022000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d277e9c9b13e17082e864579721ce1fc19a4da5 --- /dev/null +++ b/checkpoints/022000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7b6624d385c0668d6da92e7f776a2165b1881ef3d0c9ff8e7689aee78e8e635 +size 64794928 diff --git a/checkpoints/022000/training_state/rng_state.safetensors b/checkpoints/022000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b402d4e3ed0dc6d40f097682a4774dfefbdff67 --- /dev/null +++ b/checkpoints/022000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e670e294e4b7afeb2c0cd8897ad71880e59c08b1ff305cd9fd8b1235029f7fbe +size 15708 diff --git a/checkpoints/022000/training_state/scheduler_state.json b/checkpoints/022000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..684508ec5c862e93884e4138479e9a6f5aa1d0d1 --- /dev/null +++ b/checkpoints/022000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 22000, + "_step_count": 22001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.862988294000568e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/022000/training_state/training_step.json b/checkpoints/022000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..d60e27dffb5d802ca16598a852aec850971cbb39 --- /dev/null +++ b/checkpoints/022000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 22000 +} \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/README.md b/checkpoints/024000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/024000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/adapter_config.json b/checkpoints/024000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/024000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/adapter_model.safetensors b/checkpoints/024000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d2f4f7fba7644fecb1515e0d58128e6e7c6bac0a --- /dev/null +++ b/checkpoints/024000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a2dc3615faf342cdc762b3d670d7b09aea0a2d262d9d1486cbdb7fb5c6b2fc16 +size 32895960 diff --git a/checkpoints/024000/pretrained_model/config.json b/checkpoints/024000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/024000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/policy_postprocessor.json b/checkpoints/024000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/024000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/024000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/024000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/024000/pretrained_model/policy_preprocessor.json b/checkpoints/024000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/024000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/024000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/024000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/024000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/024000/pretrained_model/train_config.json b/checkpoints/024000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/024000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/024000/training_state/optimizer_param_groups.json b/checkpoints/024000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..c13c8d80816ec604f6a94e37757507a1109dcdec --- /dev/null +++ b/checkpoints/024000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 1.1810421524221319e-05, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/024000/training_state/optimizer_state.safetensors b/checkpoints/024000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28a96ae05b37b0a36bccea59df9a4992285dc460 --- /dev/null +++ b/checkpoints/024000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:860791e3a0b488dcb8ece1a7f6e0733337894e342f426f2b3f8bfeb3c18f1e76 +size 64794928 diff --git a/checkpoints/024000/training_state/rng_state.safetensors b/checkpoints/024000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac528353db101eb8caffce4c1084ed7f1a0c8b37 --- /dev/null +++ b/checkpoints/024000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:615998c06441caf06995a105478429ecf08b34d7286c1dce500f7e17156e3c29 +size 15708 diff --git a/checkpoints/024000/training_state/scheduler_state.json b/checkpoints/024000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d75dae83466b1f70a548b345410cbc1cf2f88e26 --- /dev/null +++ b/checkpoints/024000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 24000, + "_step_count": 24001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 1.1810421524221319e-05 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/024000/training_state/training_step.json b/checkpoints/024000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..4bf22a00d4c8b45342d351916c6e2f27ab1dd7d9 --- /dev/null +++ b/checkpoints/024000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 24000 +} \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/README.md b/checkpoints/026000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/026000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/adapter_config.json b/checkpoints/026000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/026000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/adapter_model.safetensors b/checkpoints/026000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9a7ca7ffc24e484b3ff9b60adc80ecc5a70097a8 --- /dev/null +++ b/checkpoints/026000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b0d96b473192f3cda3cde16a98398295d92313c4bc6740637c4cc4bae05b658 +size 32895960 diff --git a/checkpoints/026000/pretrained_model/config.json b/checkpoints/026000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/026000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/policy_postprocessor.json b/checkpoints/026000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/026000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/026000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/026000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/026000/pretrained_model/policy_preprocessor.json b/checkpoints/026000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/026000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/026000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/026000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/026000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/026000/pretrained_model/train_config.json b/checkpoints/026000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/026000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/026000/training_state/optimizer_param_groups.json b/checkpoints/026000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..e4e43a9c460c5b091c14ec90bfeae9857f0382dd --- /dev/null +++ b/checkpoints/026000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 6.714658939923202e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/026000/training_state/optimizer_state.safetensors b/checkpoints/026000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f23b42b0112ff8f1341a389f6da6d06814d927d --- /dev/null +++ b/checkpoints/026000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:125b6ab7765dd4879267fe1c849af8bd2cf5a40b70ee04431267b2707fe41249 +size 64794928 diff --git a/checkpoints/026000/training_state/rng_state.safetensors b/checkpoints/026000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bda3f79b7d5a074cb2ff854dc82d6a5fc68edd1 --- /dev/null +++ b/checkpoints/026000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d34b0781cdb978ba335fd50a241c5629a2e94f128dc12f0a495de1de7cd16f19 +size 15708 diff --git a/checkpoints/026000/training_state/scheduler_state.json b/checkpoints/026000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..62ac218a5deff43dfc43a2afdcccf98387be53b8 --- /dev/null +++ b/checkpoints/026000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 26000, + "_step_count": 26001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 6.714658939923202e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/026000/training_state/training_step.json b/checkpoints/026000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..60495b67a6a5c8ae0694d967a703afdf4bdc4066 --- /dev/null +++ b/checkpoints/026000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 26000 +} \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/README.md b/checkpoints/028000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/028000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/adapter_config.json b/checkpoints/028000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/028000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/adapter_model.safetensors b/checkpoints/028000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b078545fb50c8623c64b578114661e6f911c0c6b --- /dev/null +++ b/checkpoints/028000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:590175b1039707366f9e2a8575359eec54babe20d422612f1d1c4d064fcac88b +size 32895960 diff --git a/checkpoints/028000/pretrained_model/config.json b/checkpoints/028000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/028000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/policy_postprocessor.json b/checkpoints/028000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/028000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/028000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/028000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/028000/pretrained_model/policy_preprocessor.json b/checkpoints/028000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/028000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/028000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/028000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/028000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/028000/pretrained_model/train_config.json b/checkpoints/028000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/028000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/028000/training_state/optimizer_param_groups.json b/checkpoints/028000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..f365f2b5c2912dce3fdf81fc646e9a55384425e8 --- /dev/null +++ b/checkpoints/028000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 3.5653044642269782e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/028000/training_state/optimizer_state.safetensors b/checkpoints/028000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f31597ac5a8b3fdc2cc0f85127a441013e8e724 --- /dev/null +++ b/checkpoints/028000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38286dc4f4a8fd59af3a4f8aaf9e51fb0a03c9648b138b00d7e601bf0f0fe6a2 +size 64794928 diff --git a/checkpoints/028000/training_state/rng_state.safetensors b/checkpoints/028000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..972df86b449969d38ae84b078b317f5a2907caa0 --- /dev/null +++ b/checkpoints/028000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:200a0cc57f7927c50efbdd9a81bbda70e2411b1127a5e92a41db2370ae31c296 +size 15708 diff --git a/checkpoints/028000/training_state/scheduler_state.json b/checkpoints/028000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3b707a4867fad10211178dbc358ffc6ab39477c4 --- /dev/null +++ b/checkpoints/028000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 28000, + "_step_count": 28001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 3.5653044642269782e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/028000/training_state/training_step.json b/checkpoints/028000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..86065a62b528a2c5fbe5615c4f34ea67322ce13e --- /dev/null +++ b/checkpoints/028000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 28000 +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/README.md b/checkpoints/030000/pretrained_model/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f8c735acadca8585006ca2be9cc80efb68fa31af --- /dev/null +++ b/checkpoints/030000/pretrained_model/README.md @@ -0,0 +1,204 @@ +--- +library_name: peft +tags: +- base_model:adapter:outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model +- lora +--- + +# Model Card for Model ID + + + + + +## Model Details + +### Model Description + + + + + +- **Developed by:** [More Information Needed] +- **Funded by [optional]:** [More Information Needed] +- **Shared by [optional]:** [More Information Needed] +- **Model type:** [More Information Needed] +- **Language(s) (NLP):** [More Information Needed] +- **License:** [More Information Needed] +- **Finetuned from model [optional]:** [More Information Needed] + +### Model Sources [optional] + + + +- **Repository:** [More Information Needed] +- **Paper [optional]:** [More Information Needed] +- **Demo [optional]:** [More Information Needed] + +## Uses + + + +### Direct Use + + + +[More Information Needed] + +### Downstream Use [optional] + + + +[More Information Needed] + +### Out-of-Scope Use + + + +[More Information Needed] + +## Bias, Risks, and Limitations + + + +[More Information Needed] + +### Recommendations + + + +Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations. + +## How to Get Started with the Model + +Use the code below to get started with the model. + +[More Information Needed] + +## Training Details + +### Training Data + + + +[More Information Needed] + +### Training Procedure + + + +#### Preprocessing [optional] + +[More Information Needed] + + +#### Training Hyperparameters + +- **Training regime:** [More Information Needed] + +#### Speeds, Sizes, Times [optional] + + + +[More Information Needed] + +## Evaluation + + + +### Testing Data, Factors & Metrics + +#### Testing Data + + + +[More Information Needed] + +#### Factors + + + +[More Information Needed] + +#### Metrics + + + +[More Information Needed] + +### Results + +[More Information Needed] + +#### Summary + + + +## Model Examination [optional] + + + +[More Information Needed] + +## Environmental Impact + + + +Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700). + +- **Hardware Type:** [More Information Needed] +- **Hours used:** [More Information Needed] +- **Cloud Provider:** [More Information Needed] +- **Compute Region:** [More Information Needed] +- **Carbon Emitted:** [More Information Needed] + +## Technical Specifications [optional] + +### Model Architecture and Objective + +[More Information Needed] + +### Compute Infrastructure + +[More Information Needed] + +#### Hardware + +[More Information Needed] + +#### Software + +[More Information Needed] + +## Citation [optional] + + + +**BibTeX:** + +[More Information Needed] + +**APA:** + +[More Information Needed] + +## Glossary [optional] + + + +[More Information Needed] + +## More Information [optional] + +[More Information Needed] + +## Model Card Authors [optional] + +[More Information Needed] + +## Model Card Contact + +[More Information Needed] +### Framework versions + +- PEFT 0.19.1 \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/adapter_config.json b/checkpoints/030000/pretrained_model/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..066e017415d3ccfa60c7c2a94ca49d8ed0e1cf0e --- /dev/null +++ b/checkpoints/030000/pretrained_model/adapter_config.json @@ -0,0 +1,46 @@ +{ + "alora_invocation_tokens": null, + "alpha_pattern": {}, + "arrow_config": null, + "auto_mapping": { + "base_model_class": "SmolVLAPolicy", + "parent_library": "lerobot.policies.smolvla.modeling_smolvla" + }, + "base_model_name_or_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "bias": "none", + "corda_config": null, + "ensure_weight_tying": false, + "eva_config": null, + "exclude_modules": null, + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layer_replication": null, + "layers_pattern": null, + "layers_to_transform": null, + "loftq_config": {}, + "lora_alpha": 8, + "lora_bias": false, + "lora_dropout": 0.0, + "lora_ga_config": null, + "megatron_config": null, + "megatron_core": "megatron.core", + "modules_to_save": [], + "peft_type": "LORA", + "peft_version": "0.19.1", + "qalora_group_size": 16, + "r": 64, + "rank_pattern": {}, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "target_parameters": null, + "task_type": null, + "trainable_token_indices": null, + "use_bdlora": null, + "use_dora": false, + "use_qalora": false, + "use_rslora": false +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/adapter_model.safetensors b/checkpoints/030000/pretrained_model/adapter_model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2d9e3551438d91277d77252e7ee377afc3cfe109 --- /dev/null +++ b/checkpoints/030000/pretrained_model/adapter_model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcf16625ff939707156f132fa28410f2130798d67fe1d0d4bb85b1f41583d477 +size 32895960 diff --git a/checkpoints/030000/pretrained_model/config.json b/checkpoints/030000/pretrained_model/config.json new file mode 100644 index 0000000000000000000000000000000000000000..96a77fd041a16fbd8278850aaa2f409642c6f23f --- /dev/null +++ b/checkpoints/030000/pretrained_model/config.json @@ -0,0 +1,101 @@ +{ + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/policy_postprocessor.json b/checkpoints/030000/pretrained_model/policy_postprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..6f8997e5be18c67bad9377dd2cd9622ba38b5ae3 --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_postprocessor.json @@ -0,0 +1,32 @@ +{ + "name": "policy_postprocessor", + "steps": [ + { + "registry_name": "unnormalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_postprocessor_step_0_unnormalizer_processor.safetensors" + }, + { + "registry_name": "device_processor", + "config": { + "device": "cpu", + "float_dtype": null + } + } + ] +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors b/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_postprocessor_step_0_unnormalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/030000/pretrained_model/policy_preprocessor.json b/checkpoints/030000/pretrained_model/policy_preprocessor.json new file mode 100644 index 0000000000000000000000000000000000000000..e53b6e96d9a0df7a9ab33d165c02ef895345e2bb --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_preprocessor.json @@ -0,0 +1,89 @@ +{ + "name": "policy_preprocessor", + "steps": [ + { + "registry_name": "rename_observations_processor", + "config": { + "rename_map": { + "observation.images.front": "observation.images.camera1" + } + } + }, + { + "registry_name": "to_batch_processor", + "config": {} + }, + { + "registry_name": "smolvla_new_line_processor", + "config": {} + }, + { + "registry_name": "tokenizer_processor", + "config": { + "max_length": 48, + "task_key": "task", + "padding_side": "right", + "padding": "max_length", + "truncation": true, + "tokenizer_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct" + } + }, + { + "registry_name": "device_processor", + "config": { + "device": "cuda", + "float_dtype": null + } + }, + { + "registry_name": "normalizer_processor", + "config": { + "eps": 1e-08, + "features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "norm_map": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + } + }, + "state_file": "policy_preprocessor_step_5_normalizer_processor.safetensors" + } + ] +} \ No newline at end of file diff --git a/checkpoints/030000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors b/checkpoints/030000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..26c9f9e34b7491c69b1bb99649fd7d0d98082b3b --- /dev/null +++ b/checkpoints/030000/pretrained_model/policy_preprocessor_step_5_normalizer_processor.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95b1027d071458ea98ce5cad20f02497ee126ffb777eaba0854f405c81181c94 +size 6560 diff --git a/checkpoints/030000/pretrained_model/train_config.json b/checkpoints/030000/pretrained_model/train_config.json new file mode 100644 index 0000000000000000000000000000000000000000..8ff29369c5ad1698d281a428bb61f772ddeeb4b5 --- /dev/null +++ b/checkpoints/030000/pretrained_model/train_config.json @@ -0,0 +1,250 @@ +{ + "dataset": { + "repo_id": "robot-learning-group47/eval3_toy_video", + "root": "./data/eval3_toy_video", + "episodes": null, + "image_transforms": { + "enable": false, + "max_num_transforms": 3, + "random_order": false, + "tfs": { + "brightness": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "brightness": [ + 0.8, + 1.2 + ] + } + }, + "contrast": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "contrast": [ + 0.8, + 1.2 + ] + } + }, + "saturation": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "saturation": [ + 0.5, + 1.5 + ] + } + }, + "hue": { + "weight": 1.0, + "type": "ColorJitter", + "kwargs": { + "hue": [ + -0.05, + 0.05 + ] + } + }, + "sharpness": { + "weight": 1.0, + "type": "SharpnessJitter", + "kwargs": { + "sharpness": [ + 0.5, + 1.5 + ] + } + }, + "affine": { + "weight": 1.0, + "type": "RandomAffine", + "kwargs": { + "degrees": [ + -5.0, + 5.0 + ], + "translate": [ + 0.05, + 0.05 + ] + } + } + } + }, + "revision": null, + "use_imagenet_stats": true, + "video_backend": "torchcodec", + "streaming": false + }, + "env": null, + "policy": { + "type": "smolvla", + "n_obs_steps": 1, + "input_features": { + "observation.state": { + "type": "STATE", + "shape": [ + 6 + ] + }, + "observation.images.camera1": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera2": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + }, + "observation.images.camera3": { + "type": "VISUAL", + "shape": [ + 3, + 256, + 256 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 6 + ] + } + }, + "device": "cuda", + "use_amp": false, + "use_peft": true, + "push_to_hub": true, + "repo_id": "robot_learning_group47/eval3_test", + "private": null, + "tags": null, + "license": null, + "pretrained_path": "outputs/train/2026-05-19/16-29-49_smolvla/checkpoints/last/pretrained_model", + "chunk_size": 50, + "n_action_steps": 50, + "normalization_mapping": { + "VISUAL": "IDENTITY", + "STATE": "MEAN_STD", + "ACTION": "MEAN_STD" + }, + "max_state_dim": 32, + "max_action_dim": 32, + "resize_imgs_with_padding": [ + 512, + 512 + ], + "empty_cameras": 0, + "adapt_to_pi_aloha": false, + "use_delta_joint_actions_aloha": false, + "tokenizer_max_length": 48, + "num_steps": 10, + "use_cache": true, + "freeze_vision_encoder": true, + "train_expert_only": true, + "train_state_proj": true, + "optimizer_lr": 0.0001, + "optimizer_betas": [ + 0.9, + 0.95 + ], + "optimizer_eps": 1e-08, + "optimizer_weight_decay": 1e-10, + "optimizer_grad_clip_norm": 10.0, + "scheduler_warmup_steps": 1000, + "scheduler_decay_steps": 30000, + "scheduler_decay_lr": 2.5e-06, + "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct", + "load_vlm_weights": true, + "add_image_special_tokens": false, + "attention_mode": "cross_attn", + "prefix_length": 0, + "pad_language_to": "max_length", + "num_expert_layers": 0, + "num_vlm_layers": 16, + "self_attn_every_n_layers": 2, + "expert_width_multiplier": 0.75, + "min_period": 0.004, + "max_period": 4.0, + "rtc_config": null, + "compile_model": false, + "compile_mode": "max-autotune" + }, + "output_dir": "outputs/train/2026-05-19/17-12-35_smolvla", + "job_name": "smolvla", + "resume": false, + "seed": 1000, + "cudnn_deterministic": false, + "num_workers": 4, + "batch_size": 32, + "steps": 30000, + "eval_freq": 20000, + "log_freq": 200, + "tolerance_s": 0.0001, + "save_checkpoint": true, + "save_freq": 2000, + "use_policy_training_preset": true, + "optimizer": { + "type": "adamw", + "lr": 0.0001, + "weight_decay": 1e-10, + "grad_clip_norm": 10.0, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08 + }, + "scheduler": { + "type": "cosine_decay_with_warmup", + "num_warmup_steps": 1000, + "num_decay_steps": 30000, + "peak_lr": 0.0001, + "decay_lr": 2.5e-06 + }, + "eval": { + "n_episodes": 50, + "batch_size": 50, + "use_async_envs": false + }, + "wandb": { + "enable": false, + "disable_artifact": false, + "project": "lerobot", + "entity": null, + "notes": null, + "run_id": null, + "mode": null, + "add_tags": true + }, + "peft": { + "target_modules": [ + "q_proj", + "v_proj" + ], + "full_training_modules": null, + "method_type": "LORA", + "init_type": null, + "r": 64 + }, + "use_rabc": false, + "rabc_progress_path": null, + "rabc_kappa": 0.01, + "rabc_epsilon": 1e-06, + "rabc_head_mode": "sparse", + "rename_map": { + "observation.images.front": "observation.images.camera1" + }, + "checkpoint_path": null +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_param_groups.json b/checkpoints/030000/training_state/optimizer_param_groups.json new file mode 100644 index 0000000000000000000000000000000000000000..a7f23762b148bf63ff8a6896d20ab545a9d6ad8c --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_param_groups.json @@ -0,0 +1,697 @@ +[ + { + "lr": 2.5e-06, + "betas": [ + 0.9, + 0.95 + ], + "eps": 1e-08, + "weight_decay": 1e-10, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0001, + "params": [ + 0, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675 + ] + } +] \ No newline at end of file diff --git a/checkpoints/030000/training_state/optimizer_state.safetensors b/checkpoints/030000/training_state/optimizer_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d3ecce85d99009fe5f33871c9b3ec1d8a21562dc --- /dev/null +++ b/checkpoints/030000/training_state/optimizer_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ac80c4b4ab1cf35befd4f58a4e6188f78d6f6de603eeb17220529b5bb826716 +size 64794928 diff --git a/checkpoints/030000/training_state/rng_state.safetensors b/checkpoints/030000/training_state/rng_state.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2b0e40d0de7c2a04dcf8d44f11d58604fb79dea --- /dev/null +++ b/checkpoints/030000/training_state/rng_state.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b7e818a22eab4c694f778fec3539360b92613c027337880f92119515ab527b1 +size 15708 diff --git a/checkpoints/030000/training_state/scheduler_state.json b/checkpoints/030000/training_state/scheduler_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a09e9ada2b0bc0b2f519fcd89834a9e8725ec165 --- /dev/null +++ b/checkpoints/030000/training_state/scheduler_state.json @@ -0,0 +1,15 @@ +{ + "base_lrs": [ + 0.0001 + ], + "last_epoch": 30000, + "_step_count": 30001, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.5e-06 + ], + "lr_lambdas": [ + null + ] +} \ No newline at end of file diff --git a/checkpoints/030000/training_state/training_step.json b/checkpoints/030000/training_state/training_step.json new file mode 100644 index 0000000000000000000000000000000000000000..f4945f660f45b332883dccfccf18d8b8815d916a --- /dev/null +++ b/checkpoints/030000/training_state/training_step.json @@ -0,0 +1,3 @@ +{ + "step": 30000 +} \ No newline at end of file