Upload folder using huggingface_hub
Browse files
checkpoints/steps_50000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2f8b9838d211d7645eca9aa47d83b7420fe26fca65142f64b3634fa8401f67c
|
| 3 |
+
size 9982353306
|
checkpoints/steps_60000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d24dae91b10ee9214b913029c0c42f5625dd5be8119416dc5d1d6ad5d4513296
|
| 3 |
+
size 9982353306
|
checkpoints/steps_70000_pytorch_model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c9f3630b6ee39cc14ee8a9266f6e9900faa42381e99f9da4318bbac73fd6df9
|
| 3 |
+
size 9982353306
|
config.yaml
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
datasets:
|
| 2 |
+
vla_data:
|
| 3 |
+
data_mix: bridge
|
| 4 |
+
data_root_dir: /mnt/project_ai4edu/lyl/worlkplace/starVLA/playground/Datasets/OXE_LEROBOT_DATASET
|
| 5 |
+
image_size:
|
| 6 |
+
- 224
|
| 7 |
+
- 224
|
| 8 |
+
per_device_batch_size: 16
|
| 9 |
+
video_backend: pyav
|
| 10 |
+
frameskip:
|
| 11 |
+
cache_dir: playground/frameskip/frameskip_cache_vac_cr10-100
|
| 12 |
+
default_compression_ratio: 1.0
|
| 13 |
+
enabled: true
|
| 14 |
+
importance:
|
| 15 |
+
allow_backend_fallback: false
|
| 16 |
+
alpha: 0.6
|
| 17 |
+
beta: 0.2
|
| 18 |
+
device: cuda
|
| 19 |
+
enable_vac: true
|
| 20 |
+
gamma: 0.2
|
| 21 |
+
max_vac_frames: 16
|
| 22 |
+
type: gripper_aware
|
| 23 |
+
vac_beta: 0.2
|
| 24 |
+
video_backend: ffmpeg
|
| 25 |
+
visual_encoder_checkpoint: /mnt/project_ai4edu/share/models/timm/vit_large_patch14_dinov2.lvd142m/pytorch_model.bin
|
| 26 |
+
visual_encoder_name: vit_large_patch14_dinov2
|
| 27 |
+
pruning:
|
| 28 |
+
compression_ratios:
|
| 29 |
+
- 0.1
|
| 30 |
+
- 0.2
|
| 31 |
+
- 0.3
|
| 32 |
+
- 0.4
|
| 33 |
+
- 0.5
|
| 34 |
+
- 0.6
|
| 35 |
+
- 0.7
|
| 36 |
+
- 0.8
|
| 37 |
+
- 0.9
|
| 38 |
+
- 1.0
|
| 39 |
+
used_compression_ratios:
|
| 40 |
+
- 0.2
|
| 41 |
+
- 0.2
|
| 42 |
+
- 0.2
|
| 43 |
+
- 0.2
|
| 44 |
+
- 0.2
|
| 45 |
+
- 1.0
|
| 46 |
+
training:
|
| 47 |
+
dynamic_ratio: true
|
| 48 |
+
ratio_schedule: uniform
|
| 49 |
+
warmup_steps: 5000
|
| 50 |
+
framework:
|
| 51 |
+
action_model:
|
| 52 |
+
action_dim: 7
|
| 53 |
+
action_horizon: 16
|
| 54 |
+
action_model_type: DiT-B
|
| 55 |
+
add_pos_embed: true
|
| 56 |
+
diffusion_model_cfg:
|
| 57 |
+
cross_attention_dim: 2560
|
| 58 |
+
dropout: 0.2
|
| 59 |
+
final_dropout: true
|
| 60 |
+
interleave_self_attention: true
|
| 61 |
+
norm_type: ada_norm
|
| 62 |
+
num_layers: 16
|
| 63 |
+
output_dim: 2560
|
| 64 |
+
positional_embeddings: null
|
| 65 |
+
future_action_window_size: 15
|
| 66 |
+
hidden_size: 1024
|
| 67 |
+
max_seq_len: 1024
|
| 68 |
+
noise_beta_alpha: 1.5
|
| 69 |
+
noise_beta_beta: 1.0
|
| 70 |
+
noise_s: 0.999
|
| 71 |
+
num_inference_timesteps: 4
|
| 72 |
+
num_target_vision_tokens: 32
|
| 73 |
+
num_timestep_buckets: 1000
|
| 74 |
+
past_action_window_size: 0
|
| 75 |
+
state_dim: 7
|
| 76 |
+
name: QwenGR00T
|
| 77 |
+
qwenvl:
|
| 78 |
+
base_vlm: FrameSkip_GR00T_bridge_qwen3/Qwen3-VL-4B-Instruct
|
| 79 |
+
template: qwen3_vl
|
| 80 |
+
run_root_dir: ./results/Checkpoints/FrameSkip/SimplerEnv
|
| 81 |
+
seed: 42
|
| 82 |
+
trainer:
|
| 83 |
+
eval_interval: 1000
|
| 84 |
+
freeze_modules: true
|
| 85 |
+
gradient_accumulation_steps: 1
|
| 86 |
+
gradient_clipping: 1.0
|
| 87 |
+
is_resume: true
|
| 88 |
+
learning_rate:
|
| 89 |
+
action_model: 0.0001
|
| 90 |
+
base: 1.0e-05
|
| 91 |
+
qwen_vl_interface: 1.0e-05
|
| 92 |
+
logging_frequency: 100
|
| 93 |
+
lr_scheduler_type: cosine_with_min_lr
|
| 94 |
+
max_train_steps: 100000
|
| 95 |
+
num_warmup_steps: 2000
|
| 96 |
+
optimizer:
|
| 97 |
+
betas:
|
| 98 |
+
- 0.9
|
| 99 |
+
- 0.95
|
| 100 |
+
eps: 1.0e-08
|
| 101 |
+
weight_decay: 1.0e-08
|
| 102 |
+
repeated_diffusion_steps: 4
|
| 103 |
+
save_interval: 10000
|
| 104 |
+
scheduler_specific_kwargs:
|
| 105 |
+
min_lr: 5.0e-07
|
| 106 |
+
wandb_entity: jinhuiye
|
| 107 |
+
wandb_project: starVLA
|
dataset_statistics.json
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"oxe_bridge": {
|
| 3 |
+
"action": {
|
| 4 |
+
"mean": [
|
| 5 |
+
0.0002273258287459612,
|
| 6 |
+
0.00013113691238686442,
|
| 7 |
+
-0.00012639156193472445,
|
| 8 |
+
-0.00014385415124706924,
|
| 9 |
+
-0.00039017299422994256,
|
| 10 |
+
0.00024080397270154208,
|
| 11 |
+
0.5765793323516846
|
| 12 |
+
],
|
| 13 |
+
"std": [
|
| 14 |
+
0.009770580567419529,
|
| 15 |
+
0.013695014640688896,
|
| 16 |
+
0.012675177305936813,
|
| 17 |
+
0.02845500037074089,
|
| 18 |
+
0.030521046370267868,
|
| 19 |
+
0.07739090174436569,
|
| 20 |
+
0.4966537058353424
|
| 21 |
+
],
|
| 22 |
+
"max": [
|
| 23 |
+
0.41691166162490845,
|
| 24 |
+
0.25864794850349426,
|
| 25 |
+
0.21218234300613403,
|
| 26 |
+
3.122201919555664,
|
| 27 |
+
1.8618112802505493,
|
| 28 |
+
6.272472858428955,
|
| 29 |
+
1.0
|
| 30 |
+
],
|
| 31 |
+
"min": [
|
| 32 |
+
-0.4007510244846344,
|
| 33 |
+
-0.13874775171279907,
|
| 34 |
+
-0.22553899884223938,
|
| 35 |
+
-3.2010786533355713,
|
| 36 |
+
-1.8618112802505493,
|
| 37 |
+
-6.279075622558594,
|
| 38 |
+
0.0
|
| 39 |
+
],
|
| 40 |
+
"q01": [
|
| 41 |
+
-0.028752606511116028,
|
| 42 |
+
-0.041702210046350954,
|
| 43 |
+
-0.026096698231995105,
|
| 44 |
+
-0.08052822157740593,
|
| 45 |
+
-0.09249736212193965,
|
| 46 |
+
-0.20738411962985992,
|
| 47 |
+
0.0
|
| 48 |
+
],
|
| 49 |
+
"q99": [
|
| 50 |
+
0.02830690816044803,
|
| 51 |
+
0.04089860741049051,
|
| 52 |
+
0.04018005654215808,
|
| 53 |
+
0.08173405691981314,
|
| 54 |
+
0.07760896608233431,
|
| 55 |
+
0.20384809583425495,
|
| 56 |
+
1.0
|
| 57 |
+
],
|
| 58 |
+
"mask": [
|
| 59 |
+
true,
|
| 60 |
+
true,
|
| 61 |
+
true,
|
| 62 |
+
true,
|
| 63 |
+
true,
|
| 64 |
+
true,
|
| 65 |
+
false
|
| 66 |
+
]
|
| 67 |
+
},
|
| 68 |
+
"state": {
|
| 69 |
+
"mean": [
|
| 70 |
+
0.3094329535961151,
|
| 71 |
+
0.030725397169589996,
|
| 72 |
+
0.06444206088781357,
|
| 73 |
+
0.006489328108727932,
|
| 74 |
+
-0.07720080018043518,
|
| 75 |
+
0.10767334699630737,
|
| 76 |
+
0.0,
|
| 77 |
+
0.7081261277198792
|
| 78 |
+
],
|
| 79 |
+
"std": [
|
| 80 |
+
0.06056813895702362,
|
| 81 |
+
0.09190769493579865,
|
| 82 |
+
0.05156221613287926,
|
| 83 |
+
0.13109272718429565,
|
| 84 |
+
0.16913393139839172,
|
| 85 |
+
0.5779843926429749,
|
| 86 |
+
0.0,
|
| 87 |
+
0.35254883766174316
|
| 88 |
+
],
|
| 89 |
+
"max": [
|
| 90 |
+
0.5862360596656799,
|
| 91 |
+
0.4034728705883026,
|
| 92 |
+
0.3568263053894043,
|
| 93 |
+
1.3517684936523438,
|
| 94 |
+
1.570796251296997,
|
| 95 |
+
3.141204357147217,
|
| 96 |
+
0.0,
|
| 97 |
+
1.1121242046356201
|
| 98 |
+
],
|
| 99 |
+
"min": [
|
| 100 |
+
-0.04167502000927925,
|
| 101 |
+
-0.3563207685947418,
|
| 102 |
+
-0.15537554025650024,
|
| 103 |
+
-3.141592502593994,
|
| 104 |
+
-1.4992541074752808,
|
| 105 |
+
-3.14153790473938,
|
| 106 |
+
0.0,
|
| 107 |
+
0.04637829214334488
|
| 108 |
+
],
|
| 109 |
+
"q01": [
|
| 110 |
+
0.17102580681443214,
|
| 111 |
+
-0.16981234937906264,
|
| 112 |
+
-0.05563282176852226,
|
| 113 |
+
-0.36493386059999466,
|
| 114 |
+
-0.5418747025728226,
|
| 115 |
+
-1.3542919230461121,
|
| 116 |
+
0.0,
|
| 117 |
+
0.052190229296684265
|
| 118 |
+
],
|
| 119 |
+
"q99": [
|
| 120 |
+
0.45322125554084775,
|
| 121 |
+
0.2354859386384485,
|
| 122 |
+
0.19489662453532214,
|
| 123 |
+
0.3779941478371616,
|
| 124 |
+
0.2756884342432019,
|
| 125 |
+
1.8500668883323654,
|
| 126 |
+
0.0,
|
| 127 |
+
1.0105689764022827
|
| 128 |
+
]
|
| 129 |
+
},
|
| 130 |
+
"num_transitions": 1305714,
|
| 131 |
+
"num_trajectories": 53192
|
| 132 |
+
}
|
| 133 |
+
}
|