Safetensors
English
ChenYi99 commited on
Commit
d3b0f6e
·
verified ·
1 Parent(s): 90cdace

Upload folder using huggingface_hub

Browse files
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # 1. 排除带 _wrong 后缀的一级子目录 (末尾加/代表目录)
2
+ /*_wrong/
3
+
4
+ # 2. 排除带 evaluation 前缀的二级子目录
5
+ /*/evaluation*/
DIAL-3B-fewshot/config.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 128,
3
+ "action_head_cfg": {
4
+ "action_dim": 128,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "correct_vl_mask": false,
9
+ "diffusion_model_cfg": {
10
+ "attention_head_dim": 48,
11
+ "cross_attention_dim": 2048,
12
+ "dropout": 0.2,
13
+ "final_dropout": true,
14
+ "interleave_self_attention": true,
15
+ "norm_type": "ada_norm",
16
+ "num_attention_heads": 32,
17
+ "num_layers": 16,
18
+ "output_dim": 1024,
19
+ "positional_embeddings": null
20
+ },
21
+ "hidden_size": 1024,
22
+ "input_embedding_dim": 1536,
23
+ "max_action_dim": 128,
24
+ "max_state_dim": 128,
25
+ "model_dtype": "float32",
26
+ "noise_beta_alpha": 1.5,
27
+ "noise_beta_beta": 1.0,
28
+ "noise_s": 0.999,
29
+ "num_inference_timesteps": 4,
30
+ "num_target_vision_tokens": 0,
31
+ "num_timestep_buckets": 1000,
32
+ "tune_diffusion_model": true,
33
+ "tune_projector": true,
34
+ "use_vl_mask": false,
35
+ "use_vlln": true,
36
+ "vl_self_attention_cfg": {
37
+ "attention_head_dim": 64,
38
+ "dropout": 0.2,
39
+ "final_dropout": true,
40
+ "num_attention_heads": 32,
41
+ "num_layers": 4,
42
+ "positional_embeddings": null
43
+ }
44
+ },
45
+ "action_horizon": 16,
46
+ "architectures": [
47
+ "GR00T_N1_5_DIAL"
48
+ ],
49
+ "attn_implementation": null,
50
+ "backbone_cfg": {
51
+ "load_bf16": false,
52
+ "project_to_dim": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 36,
55
+ "tune_all_llm_embedding": false,
56
+ "tune_bridge_embedding": true,
57
+ "tune_llm": true,
58
+ "tune_visual": false,
59
+ "use_flash_attention": true,
60
+ "vlm_path": "Qwen/Qwen2.5-VL-3B-Instruct"
61
+ },
62
+ "bridge_cfg": {
63
+ "action_only_one_obs": false,
64
+ "bridge_loss_decay_steps": null,
65
+ "bridge_loss_end_w": null,
66
+ "bridge_loss_type": "mse",
67
+ "bridge_type": "end2end",
68
+ "compute_bridge_loss": true,
69
+ "goal_image_type": "future",
70
+ "noise_tau": 0,
71
+ "num_bridge_tokens": 64,
72
+ "omit_image_type_embedding_for_goal": false,
73
+ "reweight_noise": false,
74
+ "tokenizer_len": 151729,
75
+ "tune_bridge_goal": false,
76
+ "tune_bridge_visual": false,
77
+ "tune_image_type_embedding": true,
78
+ "unified_embodiment_id": 24,
79
+ "use_bridge": true,
80
+ "use_image_type_embedding": true,
81
+ "use_separate_projector_for_loss": true
82
+ },
83
+ "compute_dtype": "bfloat16",
84
+ "hidden_size": 2048,
85
+ "ignore_lang_prefix": true,
86
+ "model_dtype": "float32",
87
+ "model_type": "gr00t_n1_5_dial",
88
+ "torch_dtype": "float32",
89
+ "transformers_version": "4.52.0",
90
+ "video_delta_indices": [
91
+ 0
92
+ ]
93
+ }
DIAL-3B-fewshot/experiment_cfg/metadata.json ADDED
The diff for this file is too large to render. See raw diff
 
DIAL-3B-fewshot/model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef4245d424bc0b0fd5be6e2376f731005ea2f6e671f5b351a41c90b51bf76b47
3
+ size 4972313024
DIAL-3B-fewshot/model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66cbc3dc6a4eb9dbf6bd7c87b485da71230b0c05c116d3d0228e76b26a4aea5a
3
+ size 4932954592
DIAL-3B-fewshot/model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4af0074f406239f95f4763423f310ec2c5276e8bcf5414b9edc99e02ca3b1665
3
+ size 4932954680
DIAL-3B-fewshot/model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe8e39ceca3753aabe113e72aca191b3b57d4c6083046b92de5d1f34fb7c8bbf
3
+ size 4993877584
DIAL-3B-fewshot/model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cda9532379532b982d26d02b08aa505dee6409b0fbad9056a323acd6b374fa1
3
+ size 2243381472
DIAL-3B-fewshot/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
DIAL-3B-fewshot/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
DIAL-3B-fulldata/config.json ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_dim": 128,
3
+ "action_head_cfg": {
4
+ "action_dim": 128,
5
+ "action_horizon": 16,
6
+ "add_pos_embed": true,
7
+ "backbone_embedding_dim": 2048,
8
+ "correct_vl_mask": false,
9
+ "diffusion_model_cfg": {
10
+ "attention_head_dim": 48,
11
+ "cross_attention_dim": 2048,
12
+ "dropout": 0.2,
13
+ "final_dropout": true,
14
+ "interleave_self_attention": true,
15
+ "norm_type": "ada_norm",
16
+ "num_attention_heads": 32,
17
+ "num_layers": 16,
18
+ "output_dim": 1024,
19
+ "positional_embeddings": null
20
+ },
21
+ "hidden_size": 1024,
22
+ "input_embedding_dim": 1536,
23
+ "max_action_dim": 128,
24
+ "max_state_dim": 128,
25
+ "model_dtype": "float32",
26
+ "noise_beta_alpha": 1.5,
27
+ "noise_beta_beta": 1.0,
28
+ "noise_s": 0.999,
29
+ "num_inference_timesteps": 4,
30
+ "num_target_vision_tokens": 0,
31
+ "num_timestep_buckets": 1000,
32
+ "tune_diffusion_model": true,
33
+ "tune_projector": true,
34
+ "use_vl_mask": false,
35
+ "use_vlln": true,
36
+ "vl_self_attention_cfg": {
37
+ "attention_head_dim": 64,
38
+ "dropout": 0.2,
39
+ "final_dropout": true,
40
+ "num_attention_heads": 32,
41
+ "num_layers": 4,
42
+ "positional_embeddings": null
43
+ }
44
+ },
45
+ "action_horizon": 16,
46
+ "architectures": [
47
+ "GR00T_N1_5_DIAL"
48
+ ],
49
+ "attn_implementation": null,
50
+ "backbone_cfg": {
51
+ "load_bf16": false,
52
+ "project_to_dim": null,
53
+ "reproject_vision": false,
54
+ "select_layer": 36,
55
+ "tune_all_llm_embedding": false,
56
+ "tune_bridge_embedding": true,
57
+ "tune_llm": true,
58
+ "tune_visual": false,
59
+ "use_flash_attention": true,
60
+ "vlm_path": "Qwen/Qwen2.5-VL-3B-Instruct"
61
+ },
62
+ "bridge_cfg": {
63
+ "action_only_one_obs": false,
64
+ "bridge_loss_decay_steps": null,
65
+ "bridge_loss_end_w": null,
66
+ "bridge_loss_type": "mse",
67
+ "bridge_type": "end2end",
68
+ "compute_bridge_loss": true,
69
+ "goal_image_type": "future",
70
+ "noise_tau": 0,
71
+ "num_bridge_tokens": 64,
72
+ "omit_image_type_embedding_for_goal": false,
73
+ "reweight_noise": false,
74
+ "tokenizer_len": 151729,
75
+ "tune_bridge_goal": false,
76
+ "tune_bridge_visual": false,
77
+ "tune_image_type_embedding": true,
78
+ "unified_embodiment_id": null,
79
+ "use_bridge": true,
80
+ "use_image_type_embedding": true,
81
+ "use_separate_projector_for_loss": true
82
+ },
83
+ "compute_dtype": "bfloat16",
84
+ "hidden_size": 2048,
85
+ "ignore_lang_prefix": true,
86
+ "model_dtype": "float32",
87
+ "model_type": "gr00t_n1_5_dial",
88
+ "torch_dtype": "float32",
89
+ "transformers_version": "4.52.0",
90
+ "video_delta_indices": [
91
+ 0
92
+ ]
93
+ }
DIAL-3B-fulldata/experiment_cfg/metadata.json ADDED
The diff for this file is too large to render. See raw diff
 
DIAL-3B-fulldata/model-00001-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e953ca78ef7420f59e66ed55be71c8e13386330fa019eb3a90ed8611e5e9b9a
3
+ size 4972313024
DIAL-3B-fulldata/model-00002-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75907124b1c46b02a5a0933757dccbe274e22e6eb617eaff2c1dbb9751511278
3
+ size 4932954592
DIAL-3B-fulldata/model-00003-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8561a67a505c4ec674760690bedc257ffbe8f0810e007109b48cc22e460e6809
3
+ size 4932954680
DIAL-3B-fulldata/model-00004-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:705baaa44572307e4068da7a814dbff170a7cb7ed4f45b43a3cd7e9d38e349fb
3
+ size 4993877584
DIAL-3B-fulldata/model-00005-of-00005.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:384d6d9e2e1019bfdf8a1ee998ae93bc50d189e7377f5b1b6587e8e7119238bf
3
+ size 2243381472
DIAL-3B-fulldata/model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
DIAL-3B-fulldata/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff