Menoking commited on
Commit
b5c3b51
·
verified ·
1 Parent(s): 9d6c43e

Upload policy weights, train config and readme

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. config.json +47 -23
  3. model.safetensors +2 -2
  4. train_config.json +59 -31
README.md CHANGED
@@ -2,20 +2,20 @@
2
  datasets: lerobot/pusht
3
  library_name: lerobot
4
  license: apache-2.0
5
- model_name: act
6
  pipeline_tag: robotics
7
  tags:
 
8
  - lerobot
9
  - robotics
10
- - act
11
  ---
12
 
13
- # Model Card for act
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
- [Action Chunking with Transformers (ACT)](https://huggingface.co/papers/2304.13705) is an imitation-learning method that predicts short action chunks instead of single steps. It learns from teleoperated data and often achieves high success rates.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
 
2
  datasets: lerobot/pusht
3
  library_name: lerobot
4
  license: apache-2.0
5
+ model_name: diffusion
6
  pipeline_tag: robotics
7
  tags:
8
+ - diffusion
9
  - lerobot
10
  - robotics
 
11
  ---
12
 
13
+ # Model Card for diffusion
14
 
15
  <!-- Provide a quick summary of what the model is/does. -->
16
 
17
 
18
+ [Diffusion Policy](https://huggingface.co/papers/2303.04137) treats visuomotor control as a generative diffusion process, producing smooth, multi-step action trajectories that excel at contact-rich manipulation.
19
 
20
 
21
  This policy has been trained and pushed to the Hub using [LeRobot](https://github.com/huggingface/lerobot).
config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
- "type": "act",
3
- "n_obs_steps": 1,
4
  "input_features": {
5
  "observation.image": {
6
  "type": "VISUAL",
@@ -34,30 +34,54 @@
34
  "tags": null,
35
  "license": null,
36
  "pretrained_path": null,
37
- "chunk_size": 20,
38
- "n_action_steps": 10,
39
  "normalization_mapping": {
40
  "VISUAL": "MEAN_STD",
41
- "STATE": "MEAN_STD",
42
- "ACTION": "MEAN_STD"
43
  },
 
44
  "vision_backbone": "resnet34",
 
 
 
 
 
 
 
45
  "pretrained_backbone_weights": "ResNet34_Weights.IMAGENET1K_V1",
46
- "replace_final_stride_with_dilation": 0,
47
- "pre_norm": false,
48
- "dim_model": 512,
49
- "n_heads": 8,
50
- "dim_feedforward": 3200,
51
- "feedforward_activation": "relu",
52
- "n_encoder_layers": 4,
53
- "n_decoder_layers": 1,
54
- "use_vae": true,
55
- "latent_dim": 32,
56
- "n_vae_encoder_layers": 4,
57
- "temporal_ensemble_coeff": null,
58
- "dropout": 0.1,
59
- "kl_weight": 10.0,
60
- "optimizer_lr": 2e-05,
61
- "optimizer_weight_decay": 0.0001,
62
- "optimizer_lr_backbone": 2e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  }
 
1
  {
2
+ "type": "diffusion",
3
+ "n_obs_steps": 2,
4
  "input_features": {
5
  "observation.image": {
6
  "type": "VISUAL",
 
34
  "tags": null,
35
  "license": null,
36
  "pretrained_path": null,
37
+ "horizon": 32,
38
+ "n_action_steps": 8,
39
  "normalization_mapping": {
40
  "VISUAL": "MEAN_STD",
41
+ "STATE": "MIN_MAX",
42
+ "ACTION": "MIN_MAX"
43
  },
44
+ "drop_n_last_frames": 23,
45
  "vision_backbone": "resnet34",
46
+ "resize_shape": null,
47
+ "crop_ratio": 1.0,
48
+ "crop_shape": [
49
+ 84,
50
+ 84
51
+ ],
52
+ "crop_is_random": true,
53
  "pretrained_backbone_weights": "ResNet34_Weights.IMAGENET1K_V1",
54
+ "use_group_norm": false,
55
+ "spatial_softmax_num_keypoints": 32,
56
+ "use_separate_rgb_encoder_per_camera": false,
57
+ "down_dims": [
58
+ 512,
59
+ 1024,
60
+ 2048
61
+ ],
62
+ "kernel_size": 5,
63
+ "n_groups": 8,
64
+ "diffusion_step_embed_dim": 128,
65
+ "use_film_scale_modulation": true,
66
+ "noise_scheduler_type": "DDIM",
67
+ "num_train_timesteps": 100,
68
+ "beta_schedule": "squaredcos_cap_v2",
69
+ "beta_start": 0.0001,
70
+ "beta_end": 0.02,
71
+ "prediction_type": "epsilon",
72
+ "clip_sample": true,
73
+ "clip_sample_range": 1.0,
74
+ "num_inference_steps": 10,
75
+ "compile_model": false,
76
+ "compile_mode": "reduce-overhead",
77
+ "do_mask_loss_for_padding": false,
78
+ "optimizer_lr": 0.0001,
79
+ "optimizer_betas": [
80
+ 0.95,
81
+ 0.999
82
+ ],
83
+ "optimizer_eps": 1e-08,
84
+ "optimizer_weight_decay": 1e-06,
85
+ "scheduler_name": "cosine",
86
+ "scheduler_warmup_steps": 500
87
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88dd53e07a1bfbf8c9f089ff7de3d798168656d397ad3ac29ecf1b16a52f819e
3
- size 246810040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17fb3860e84d4ce6c5e2a06509f143fd43fcb1c4ebc00d3f66af3bd8ee5fab29
3
+ size 1091380504
train_config.json CHANGED
@@ -81,8 +81,8 @@
81
  },
82
  "env": null,
83
  "policy": {
84
- "type": "act",
85
- "n_obs_steps": 1,
86
  "input_features": {
87
  "observation.image": {
88
  "type": "VISUAL",
@@ -116,35 +116,59 @@
116
  "tags": null,
117
  "license": null,
118
  "pretrained_path": null,
119
- "chunk_size": 20,
120
- "n_action_steps": 10,
121
  "normalization_mapping": {
122
  "VISUAL": "MEAN_STD",
123
- "STATE": "MEAN_STD",
124
- "ACTION": "MEAN_STD"
125
  },
 
126
  "vision_backbone": "resnet34",
 
 
 
 
 
 
 
127
  "pretrained_backbone_weights": "ResNet34_Weights.IMAGENET1K_V1",
128
- "replace_final_stride_with_dilation": 0,
129
- "pre_norm": false,
130
- "dim_model": 512,
131
- "n_heads": 8,
132
- "dim_feedforward": 3200,
133
- "feedforward_activation": "relu",
134
- "n_encoder_layers": 4,
135
- "n_decoder_layers": 1,
136
- "use_vae": true,
137
- "latent_dim": 32,
138
- "n_vae_encoder_layers": 4,
139
- "temporal_ensemble_coeff": null,
140
- "dropout": 0.1,
141
- "kl_weight": 10.0,
142
- "optimizer_lr": 2e-05,
143
- "optimizer_weight_decay": 0.0001,
144
- "optimizer_lr_backbone": 2e-05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  },
146
- "output_dir": "/workspace/outputs/act-pusht-run11-50K",
147
- "job_name": "act_pusht_run11-50K",
148
  "resume": false,
149
  "seed": 1000,
150
  "cudnn_deterministic": false,
@@ -158,17 +182,21 @@
158
  "save_freq": 25000,
159
  "use_policy_training_preset": true,
160
  "optimizer": {
161
- "type": "adamw",
162
- "lr": 2e-05,
163
- "weight_decay": 0.0001,
164
  "grad_clip_norm": 10.0,
165
  "betas": [
166
- 0.9,
167
  0.999
168
  ],
169
  "eps": 1e-08
170
  },
171
- "scheduler": null,
 
 
 
 
172
  "eval": {
173
  "n_episodes": 20,
174
  "batch_size": 8,
@@ -180,7 +208,7 @@
180
  "project": "pusht-t4",
181
  "entity": null,
182
  "notes": null,
183
- "run_id": "83vimi6x",
184
  "mode": null,
185
  "add_tags": true
186
  },
 
81
  },
82
  "env": null,
83
  "policy": {
84
+ "type": "diffusion",
85
+ "n_obs_steps": 2,
86
  "input_features": {
87
  "observation.image": {
88
  "type": "VISUAL",
 
116
  "tags": null,
117
  "license": null,
118
  "pretrained_path": null,
119
+ "horizon": 32,
120
+ "n_action_steps": 8,
121
  "normalization_mapping": {
122
  "VISUAL": "MEAN_STD",
123
+ "STATE": "MIN_MAX",
124
+ "ACTION": "MIN_MAX"
125
  },
126
+ "drop_n_last_frames": 23,
127
  "vision_backbone": "resnet34",
128
+ "resize_shape": null,
129
+ "crop_ratio": 1.0,
130
+ "crop_shape": [
131
+ 84,
132
+ 84
133
+ ],
134
+ "crop_is_random": true,
135
  "pretrained_backbone_weights": "ResNet34_Weights.IMAGENET1K_V1",
136
+ "use_group_norm": false,
137
+ "spatial_softmax_num_keypoints": 32,
138
+ "use_separate_rgb_encoder_per_camera": false,
139
+ "down_dims": [
140
+ 512,
141
+ 1024,
142
+ 2048
143
+ ],
144
+ "kernel_size": 5,
145
+ "n_groups": 8,
146
+ "diffusion_step_embed_dim": 128,
147
+ "use_film_scale_modulation": true,
148
+ "noise_scheduler_type": "DDIM",
149
+ "num_train_timesteps": 100,
150
+ "beta_schedule": "squaredcos_cap_v2",
151
+ "beta_start": 0.0001,
152
+ "beta_end": 0.02,
153
+ "prediction_type": "epsilon",
154
+ "clip_sample": true,
155
+ "clip_sample_range": 1.0,
156
+ "num_inference_steps": 10,
157
+ "compile_model": false,
158
+ "compile_mode": "reduce-overhead",
159
+ "do_mask_loss_for_padding": false,
160
+ "optimizer_lr": 0.0001,
161
+ "optimizer_betas": [
162
+ 0.95,
163
+ 0.999
164
+ ],
165
+ "optimizer_eps": 1e-08,
166
+ "optimizer_weight_decay": 1e-06,
167
+ "scheduler_name": "cosine",
168
+ "scheduler_warmup_steps": 500
169
  },
170
+ "output_dir": "/workspace/outputs/diffusion-pusht-run2-50K",
171
+ "job_name": "diffusion_pusht_run2-50K",
172
  "resume": false,
173
  "seed": 1000,
174
  "cudnn_deterministic": false,
 
182
  "save_freq": 25000,
183
  "use_policy_training_preset": true,
184
  "optimizer": {
185
+ "type": "adam",
186
+ "lr": 0.0001,
187
+ "weight_decay": 1e-06,
188
  "grad_clip_norm": 10.0,
189
  "betas": [
190
+ 0.95,
191
  0.999
192
  ],
193
  "eps": 1e-08
194
  },
195
+ "scheduler": {
196
+ "type": "diffuser",
197
+ "num_warmup_steps": 500,
198
+ "name": "cosine"
199
+ },
200
  "eval": {
201
  "n_episodes": 20,
202
  "batch_size": 8,
 
208
  "project": "pusht-t4",
209
  "entity": null,
210
  "notes": null,
211
+ "run_id": "uuoy2iqs",
212
  "mode": null,
213
  "add_tags": true
214
  },