jaehyunkang commited on
Commit
9cb5bdf
·
0 Parent(s):

RLDX-1 Release

Browse files
.gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ teaser.png filter=lfs diff=lfs merge=lfs -text
37
+ architecture.png filter=lfs diff=lfs merge=lfs -text
LICENSE.md ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RLWRLD Model License v1.0
2
+
3
+ ## 1. Definitions
4
+
5
+ "Licensor" means RLWRLD, INC. and its affiliates.
6
+
7
+ "Model" means the machine learning model, including learnt weights, parameters, configuration files, and documentation made available under this license.
8
+
9
+ "Derivative Model" means all (a) modifications to the Model, (b) works based on the Model, and (c) any other derivative works of the Model, including models fine-tuned from the Model.
10
+
11
+ "You" means an individual or legal entity exercising permissions granted by this license.
12
+
13
+ ## 2. License Grant
14
+
15
+ Subject to the terms and conditions of this license, Licensor grants to You a perpetual, worldwide, non-exclusive, royalty-free license to use, reproduce, prepare derivative works of, publicly display, publicly perform, and distribute the Model and any Derivative Models.
16
+
17
+ ## 3. Conditions and Limitations
18
+
19
+ **3.1 Non-Commercial Use.** The Model and any Derivative Models may only be used for non-commercial purposes. "Non-commercial" means for academic research, educational, personal, or evaluation purposes only, and does not include any use primarily intended for or directed toward commercial advantage or monetary compensation.
20
+
21
+ **3.2 Attribution.** You must give appropriate credit to Licensor, provide a link to this license, and indicate if changes were made. You must include the following attribution notice with any distribution of the Model or Derivative Model:
22
+
23
+ > "Licensed under the RLWRLD Model License v1.0"
24
+
25
+ **3.3 Share-Alike.** If You distribute a Derivative Model, You must do so under this same license, or another license that includes at minimum (a) a non-commercial use limitation no less restrictive than Section 3.1 and (b) a share-alike requirement no less restrictive than this Section 3.3.
26
+
27
+ **3.4 Redistribution.** You may distribute copies of the Model or Derivative Models provided that You (a) include a complete copy of this license, (b) retain all copyright, trademark, and attribution notices, and (c) comply with all conditions in this Section 3.
28
+
29
+ **3.5 Use Restrictions.** The Model and any Derivative Models shall not be used for: (a) military, weapons development, or defense applications; (b) surveillance or monitoring of individuals without their consent; or (c) any use that violates applicable laws or regulations.
30
+
31
+ **3.6 Trademarks.** This license does not grant any rights to use Licensor's names, logos, or trademarks, except as required for reasonable and customary use in describing the origin of the Model and reproducing the notices described in this license.
32
+
33
+ **3.7 Patent Claims.** If You or Your affiliate(s) bring or threaten to bring any claim or litigation (including any claim, cross-claim, or counterclaim in a lawsuit) against any entity to enforce any patents that You allege are infringed by the Model, then any rights granted to You under this license will terminate immediately.
34
+
35
+ **3.8 Termination.** If You violate any term of this license, Your rights under this license will terminate immediately.
36
+
37
+ ## 4. Third-Party Components
38
+
39
+ The Model may include or be distributed with third-party components that are subject to separate license terms and notices. Such components are subject to their respective licenses, including any notices and disclaimers contained therein. Licensor does not grant any rights with respect to third-party components beyond those provided under the applicable third-party licenses.
40
+
41
+ ## 5. Disclaimer of Warranty
42
+
43
+ THE MODEL IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR NONINFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. YOU ARE SOLELY RESPONSIBLE FOR DETERMINING THE APPROPRIATENESS OR LAWFULNESS OF USING OR REDISTRIBUTING THE MODEL, DERIVATIVE MODELS OR ANY OUTPUT OR RESULTS AND ASSUME ANY RISKS ASSOCIATED WITH YOUR USE OF THE MODEL, DERIVATIVE MODELS AND ANY OUTPUT AND RESULTS.
44
+
45
+ ## 6. Limitation of Liability
46
+
47
+ IN NO EVENT SHALL LICENSOR BE LIABLE TO YOU FOR ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE OR THE USE OR INABILITY TO USE THE MODEL, DERIVATIVE MODELS, OR ANY OUTPUTS THEREOF, EVEN IF LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
48
+
49
+ ## 7. Indemnity
50
+
51
+ You shall indemnify and hold harmless Licensor from and against any claim by any third party arising out of or related to Your use or distribution of the Model, Derivative Models, or any outputs thereof.
52
+
53
+ ## 8. Feedback
54
+
55
+ If You provide feedback, suggestions, or improvements regarding the Model, Licensor may use such feedback without restriction or compensation to You.
56
+
57
+ ## 9. General Provisions
58
+
59
+ **9.1 Governing Law.** This license will be governed by and construed in accordance with the laws of the State of Delaware, United States, without regard to its conflict of laws rules. The UN Convention on Contracts for International Sale of Goods does not apply to this license.
60
+
61
+ **9.2 License Updates.** Licensor may update this license to comply with legal and regulatory requirements at any time. You agree to either comply with any updated license or cease Your use and distribution of the Model and any Derivative Model.
README.md ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ license_name: rlwrld-model-license-v1.0
4
+ license_link: LICENSE.md
5
+ library_name: transformers
6
+ pipeline_tag: robotics
7
+ tags:
8
+ - robotics
9
+ - vla
10
+ - vision-language-action
11
+ - manipulation
12
+ - flow-matching
13
+ - rldx
14
+ - droid
15
+ base_model: RLWRLD/RLDX-1-PT
16
+ ---
17
+
18
+ # RLDX-1-MT-DROID
19
+
20
+ [Paper](https://arxiv.org/abs/2605.03269)  ·  [Project page](https://rlwrld.ai/rldx-1)  ·  [Code](https://github.com/RLWRLD/RLDX-1)  ·  [Models](https://huggingface.co/collections/RLWRLD/rldx-1)
21
+
22
+ <p align="center">
23
+ <img src="teaser.png" width="100%" alt="RLDX-1 teaser">
24
+ </p>
25
+
26
+ **RLDX-1** is a general-purpose Robot Foundation Model designed for dexterous
27
+ manipulation. Powered by a **Multi-Stream Action Transformer (MSAT)**, it
28
+ seamlessly unifies multimodal perception (visual + tactile), high-DoF
29
+ actuation, and memory-aware decision-making in a single architecture.
30
+
31
+ This repository hosts **`RLDX-1-MT-DROID`** — RLDX-1 **mid-trained** on the
32
+ [DROID](https://droid-dataset.github.io/) dataset (large-scale Franka-arm
33
+ teleoperation). Mid-training continues from the multi-source `RLDX-1-PT`
34
+ pretraining with an embodiment-specific corpus before downstream task
35
+ finetuning, making this checkpoint a stronger initialization than
36
+ `RLDX-1-PT` for any Franka-style downstream task.
37
+
38
+ ## Highlights
39
+
40
+ - **Multi-Stream Action Transformer (MSAT).** Cognition, physics, and
41
+ action each get a dedicated stream coupled by joint self-attention —
42
+ an extension of MM-DiT to action modeling.
43
+ - **Motion awareness.** Multi-frame observations + a motion module
44
+ capture temporal dynamics; intermediate VLM layers compress video
45
+ tokens to keep the policy efficient.
46
+ - **Long-term memory.** A memory module fuses past cognition features
47
+ with the current ones for history-grounded decisions beyond a short
48
+ multi-frame window.
49
+ - **Physical sensing.** Tactile and torque enter as a dedicated physics
50
+ stream; the decoder is jointly trained to predict future physical
51
+ signals.
52
+ - **Three-stage training.** Pre-training (generalization) → mid-training
53
+ (functionality) → post-training (task adaptation), with synthetic data
54
+ augmenting rare manipulation scenarios.
55
+ - **Real-time inference.** Static graph capture + custom fused kernels
56
+ bring the all-modality model to **43.7 ms / step on RTX 5090
57
+ (1.63× speedup, >22 Hz)**.
58
+
59
+ ## Quick start
60
+
61
+ ### Installation
62
+
63
+ ```bash
64
+ git clone https://github.com/RLWRLD/RLDX-1.git
65
+ cd RLDX
66
+ uv sync --python 3.10
67
+ uv pip install -e .
68
+ ```
69
+
70
+ ### Inference
71
+
72
+ ```python
73
+ from rldx.policy.rldx_policy import RLDXPolicy
74
+ from rldx.data.embodiment_tags import EmbodimentTag
75
+
76
+ policy = RLDXPolicy(
77
+ model_path="RLWRLD/RLDX-1-MT-DROID",
78
+ embodiment_tag=EmbodimentTag.OXE_DROID,
79
+ device="cuda:0",
80
+ )
81
+
82
+ action = policy.get_action(observation)
83
+ ```
84
+
85
+ ### Real-time serving (ZeroMQ)
86
+
87
+ ```bash
88
+ uv run python rldx/eval/run_rldx_server.py \
89
+ --model-path RLWRLD/RLDX-1-MT-DROID \
90
+ --embodiment-tag OXE_DROID \
91
+ --host 0.0.0.0 --port 20000
92
+ ```
93
+
94
+ ### Finetune from this checkpoint
95
+
96
+ ```bash
97
+ uv run python rldx/experiment/launch_train.py \
98
+ --base-model-path RLWRLD/RLDX-1-MT-DROID \
99
+ --dataset-path /path/to/your/dataset \
100
+ --embodiment-tag OXE_DROID \
101
+ --video-length 4 --n-cog-tokens 64 \
102
+ --global-batch-size 64 --learning-rate 1e-4 \
103
+ --max-steps 60000 --output-dir ./outputs/my_finetune
104
+ ```
105
+
106
+ For a full finetune walkthrough see
107
+ [`docs/training.md`](https://github.com/RLWRLD/RLDX-1/blob/main/docs/training.md).
108
+
109
+ ## Model details
110
+
111
+ - **Architecture:** Multi-Stream Action Transformer (MSAT) policy on a
112
+ Qwen3-VL backbone with cognition-token perceptual summary. Trained with
113
+ flow matching.
114
+ - **Inputs:** RGB video (default 4 frames), state proprioception, language
115
+ instruction.
116
+ - **Outputs:** Action chunks of length 16.
117
+ - **Embodiment tag:** `OXE_DROID`.
118
+ - **Base model:** [`RLWRLD/RLDX-1-PT`](https://huggingface.co/RLWRLD/RLDX-1-PT).
119
+ - **Backbone:** [`Qwen/Qwen3-VL-8B-Instruct`](https://huggingface.co/Qwen/Qwen3-VL-8B-Instruct).
120
+ - **Mid-train data:** DROID.
121
+ - **Params:** 8.1B.
122
+
123
+ For the full architectural walkthrough see
124
+ [`docs/architecture.md`](https://github.com/RLWRLD/RLDX-1/blob/main/docs/architecture.md).
125
+
126
+ ## RLDX-1 model family
127
+
128
+ | Checkpoint | Description |
129
+ |---|---|
130
+ | [`RLDX-1-PT`](https://huggingface.co/RLWRLD/RLDX-1-PT) | Multi-source pretrained foundation |
131
+ | [`RLDX-1-VLM`](https://huggingface.co/RLWRLD/RLDX-1-VLM) | Qwen3-VL-8B vision-language backbone |
132
+ | [`RLDX-1-FT-ROBOCASA`](https://huggingface.co/RLWRLD/RLDX-1-FT-ROBOCASA) | RoboCasa Kitchen 24-task finetune |
133
+ | [`RLDX-1-FT-RC365`](https://huggingface.co/RLWRLD/RLDX-1-FT-RC365) | RoboCasa-365 cross-task finetune |
134
+ | [`RLDX-1-FT-LIBERO`](https://huggingface.co/RLWRLD/RLDX-1-FT-LIBERO) | LIBERO 4-task suite (goal, object, spatial, long) finetune |
135
+ | [`RLDX-1-FT-SIMPLER-GOOGLE`](https://huggingface.co/RLWRLD/RLDX-1-FT-SIMPLER-GOOGLE) | SIMPLER Google VM/VA finetune |
136
+ | [`RLDX-1-FT-SIMPLER-WIDOWX`](https://huggingface.co/RLWRLD/RLDX-1-FT-SIMPLER-WIDOWX) | SIMPLER WidowX finetune |
137
+ | [`RLDX-1-FT-GR1`](https://huggingface.co/RLWRLD/RLDX-1-FT-GR1) | GR-1 Tabletop finetune |
138
+ | [`RLDX-1-MT-DROID`](https://huggingface.co/RLWRLD/RLDX-1-MT-DROID) | DROID mid-train (this repo) |
139
+ | [`RLDX-1-MT-ALLEX`](https://huggingface.co/RLWRLD/RLDX-1-MT-ALLEX) | All add-ons (memory + motion + physics + video) |
140
+
141
+ ## Intended use & limitations
142
+
143
+ **Intended use.** As a strong initialization for downstream finetuning on
144
+ Franka-arm manipulation tasks; research on robotic manipulation; and
145
+ non-commercial real-robot deployment under the conditions of the RLWRLD
146
+ Model License v1.0.
147
+
148
+ **Out of scope.** Commercial deployment, military or weapons applications,
149
+ non-consensual surveillance, and any use that violates applicable laws or
150
+ regulations. See [`LICENSE.md`](LICENSE.md) §3.5 for the full list.
151
+
152
+ **Limitations.** Mid-train conditioning is most useful for Franka-style
153
+ embodiments. For very different morphologies (humanoid, dual-arm, mobile),
154
+ [`RLDX-1-PT`](https://huggingface.co/RLWRLD/RLDX-1-PT) or
155
+ [`RLDX-1-MT-ALLEX`](https://huggingface.co/RLWRLD/RLDX-1-MT-ALLEX) may be
156
+ better starting points. The memory, motion, and physics modules are
157
+ inactive in this checkpoint — enable them at finetune time if needed.
158
+
159
+ ## Citation
160
+
161
+ ```bibtex
162
+ @article{rldx2026,
163
+ title={RLDX-1 Technical Report},
164
+ author={Kim, Dongyoung and Jang, Huiwon and Koo, Myungkyu and Jang, Suhyeok and Kim, Taeyoung and others},
165
+ year={2026},
166
+ note={RLWRLD},
167
+ eprint={2605.03269},
168
+ archivePrefix={arXiv},
169
+ url={https://arxiv.org/abs/2605.03269}
170
+ }
171
+ ```
172
+
173
+ ## License
174
+
175
+ Released under the **RLWRLD Model License v1.0** — a non-commercial license
176
+ with attribution and share-alike requirements. See [`LICENSE.md`](LICENSE.md) for
177
+ the full text. By using this model you agree to those terms, including the
178
+ use restrictions in §3.5.
architecture.png ADDED

Git LFS Details

  • SHA256: 8d0e305139502965d4289446add15e9e11c34dcc8106ad526fa8c957c12595d3
  • Pointer size: 132 Bytes
  • Size of remote file: 1.09 MB
config.json ADDED
@@ -0,0 +1,129 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "action_model_lora_alpha": 32,
3
+ "action_model_lora_dropout": 0.0,
4
+ "action_model_lora_rank": 16,
5
+ "action_model_lora_target_modules": [
6
+ "vl_qkv",
7
+ "vl_proj",
8
+ "sa_qkv",
9
+ "sa_proj",
10
+ "p_qkv",
11
+ "p_proj",
12
+ "linear1",
13
+ "linear2"
14
+ ],
15
+ "action_model_use_lora": false,
16
+ "action_horizon": 16,
17
+ "allow_missing_physics": true,
18
+ "architectures": [
19
+ "RLDX"
20
+ ],
21
+ "backbone_model_type": "vtc_qwen3_vl",
22
+ "backbone_trainable_params_fp32": true,
23
+ "color_jitter_params": {
24
+ "brightness": 0.3,
25
+ "contrast": 0.4,
26
+ "hue": 0.08,
27
+ "saturation": 0.5
28
+ },
29
+ "concat_memory": true,
30
+ "conversation_image_first": false,
31
+ "diffusion_model_cfg": {
32
+ "action_model_max_seq_len": 512,
33
+ "attention_head_dim": 64,
34
+ "depth_multi_stream": 4,
35
+ "depth_single_stream": 8,
36
+ "dropout": 0.2,
37
+ "final_dropout": true,
38
+ "num_attention_heads": 24,
39
+ "output_dim": 1024,
40
+ "physics_dim": 22,
41
+ "positional_embeddings": "rope_sa_only",
42
+ "pre_norm": "layer_norm",
43
+ "qk_norm": "rms_norm",
44
+ "rope_theta": 10000.0,
45
+ "sa_dim": 1536,
46
+ "temb_type": "input_token",
47
+ "use_physics": true,
48
+ "use_swiglu": true,
49
+ "vl_dim": 4096
50
+ },
51
+ "dtype": "bfloat16",
52
+ "freeze_cog_tokens": false,
53
+ "load_bf16": true,
54
+ "memory_cfg": {
55
+ "hidden_size": 4096,
56
+ "intermediate_size": 16384,
57
+ "max_position_embeddings": 32,
58
+ "num_attention_heads": 16,
59
+ "num_hidden_layers": 2,
60
+ "num_key_value_heads": 16,
61
+ "rms_norm_eps": 1e-05,
62
+ "use_causal_attn": true,
63
+ "use_rope": true
64
+ },
65
+ "memory_dropout_prob": 0.3,
66
+ "memory_length": 4,
67
+ "memory_n_cog_tokens": 16,
68
+ "memory_video_delta_indices": [
69
+ -48,
70
+ -32,
71
+ -16,
72
+ 0
73
+ ],
74
+ "model_name": "RLWRLD/RLDX-1-VLM",
75
+ "model_type": "RLDX-1",
76
+ "motion_drop": true,
77
+ "use_motion": true,
78
+ "motion_gradient_check": false,
79
+ "motion_injection_point": "vision_encoder",
80
+ "motion_insert_layer": 9,
81
+ "motion_pool_type": "avg",
82
+ "n_cog_tokens": 64,
83
+ "general_embodiment_train_ratio": 0,
84
+ "physics_delta_indices": [
85
+ 0,
86
+ 1,
87
+ 2,
88
+ 3,
89
+ 4,
90
+ 5,
91
+ 6,
92
+ 7,
93
+ 8,
94
+ 9,
95
+ 10,
96
+ 11,
97
+ 12,
98
+ 13,
99
+ 14,
100
+ 15,
101
+ 16
102
+ ],
103
+ "physics_dims": [
104
+ 15,
105
+ 7
106
+ ],
107
+ "physics_dropout_prob": 0.3,
108
+ "physics_keys": [
109
+ "tactile",
110
+ "torque"
111
+ ],
112
+ "physics_loss_weight": 0.1,
113
+ "physics_use_flow_matching": true,
114
+ "qwen3_collator": true,
115
+ "random_rotation_angle": null,
116
+ "reproject_vision": false,
117
+ "state_dropout_prob": 0.3,
118
+ "transformers_version": "4.57.0",
119
+ "tune_diffusion_model": true,
120
+ "tune_llm": false,
121
+ "tune_projector": true,
122
+ "tune_top_llm_layers": 4,
123
+ "tune_visual": false,
124
+ "use_memory": true,
125
+ "use_physics": true,
126
+ "use_relative_action": true,
127
+ "use_video": true,
128
+ "video_length": 4
129
+ }
embodiment_id.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "general_embodiment": 0,
3
+ "fractal20220817_data": 1,
4
+ "kuka": 2,
5
+ "bridge_orig": 3,
6
+ "taco_play": 4,
7
+ "jaco_play": 5,
8
+ "berkeley_cable_routing": 6,
9
+ "roboturk": 7,
10
+ "viola": 8,
11
+ "berkeley_autolab_ur5": 9,
12
+ "toto": 10,
13
+ "language_table": 11,
14
+ "stanford_hydra_dataset_converted_externally_to_rlds": 12,
15
+ "austin_buds_dataset_converted_externally_to_rlds": 13,
16
+ "nyu_franka_play_dataset_converted_externally_to_rlds": 14,
17
+ "furniture_bench_dataset_converted_externally_to_rlds": 15,
18
+ "ucsd_kitchen_dataset_converted_externally_to_rlds": 16,
19
+ "austin_sailor_dataset_converted_externally_to_rlds": 17,
20
+ "austin_sirius_dataset_converted_externally_to_rlds": 18,
21
+ "dlr_edan_shared_control_converted_externally_to_rlds": 19,
22
+ "iamlab_cmu_pickup_insert_converted_externally_to_rlds": 20,
23
+ "utaustin_mutex": 21,
24
+ "berkeley_fanuc_manipulation": 22,
25
+ "cmu_stretch": 23,
26
+ "bc_z": 24,
27
+ "fmb_dataset": 25,
28
+ "dobbe": 26,
29
+ "droid": 27,
30
+ "agibot_dexhand": 28,
31
+ "agibot_gripper": 29,
32
+ "galaxea": 30,
33
+ "humanoid_everyday_g1": 31,
34
+ "humanoid_everyday_h1": 32,
35
+ "action_net": 33,
36
+ "neural_gr1": 34,
37
+ "new_embodiment": 35
38
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce146f17f2a904de80661c3867044694668f0621fec122d35c87c50603b2fd3
3
+ size 4916790112
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9a618705ed94eaa44a0315a7af89a444c95b02cb84cd7270193ad4ca0c333ee
3
+ size 4446192352
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc8b50a8613bde56ec84d58eabda0da50540188995362fb6ca2f80e5fef61852
3
+ size 4949234600
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e17ec3ac95cb844150b630bd411a30665851906d33dbd23de27669b1458b08c
3
+ size 1819284516
model.safetensors.index.json ADDED
@@ -0,0 +1,890 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_parameters": 8065690582,
4
+ "total_size": 16131501444
5
+ },
6
+ "weight_map": {
7
+ "backbone.cog_emb": "model-00001-of-00004.safetensors",
8
+ "backbone.qwen_model.model.language_model.embed_tokens.weight": "model-00001-of-00004.safetensors",
9
+ "backbone.qwen_model.model.language_model.layers.0.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
10
+ "backbone.qwen_model.model.language_model.layers.0.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
11
+ "backbone.qwen_model.model.language_model.layers.0.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
12
+ "backbone.qwen_model.model.language_model.layers.0.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
13
+ "backbone.qwen_model.model.language_model.layers.0.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
14
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
15
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
16
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
17
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
18
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
19
+ "backbone.qwen_model.model.language_model.layers.0.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "backbone.qwen_model.model.language_model.layers.1.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "backbone.qwen_model.model.language_model.layers.1.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "backbone.qwen_model.model.language_model.layers.1.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "backbone.qwen_model.model.language_model.layers.1.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "backbone.qwen_model.model.language_model.layers.1.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
26
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
29
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "backbone.qwen_model.model.language_model.layers.1.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
31
+ "backbone.qwen_model.model.language_model.layers.2.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
32
+ "backbone.qwen_model.model.language_model.layers.2.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
33
+ "backbone.qwen_model.model.language_model.layers.2.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
34
+ "backbone.qwen_model.model.language_model.layers.2.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
35
+ "backbone.qwen_model.model.language_model.layers.2.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
36
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
37
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
38
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
39
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
40
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
41
+ "backbone.qwen_model.model.language_model.layers.2.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
42
+ "backbone.qwen_model.model.language_model.layers.3.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
43
+ "backbone.qwen_model.model.language_model.layers.3.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
44
+ "backbone.qwen_model.model.language_model.layers.3.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
45
+ "backbone.qwen_model.model.language_model.layers.3.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
46
+ "backbone.qwen_model.model.language_model.layers.3.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
47
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
48
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
49
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
50
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
51
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
52
+ "backbone.qwen_model.model.language_model.layers.3.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
53
+ "backbone.qwen_model.model.language_model.layers.4.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
54
+ "backbone.qwen_model.model.language_model.layers.4.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
55
+ "backbone.qwen_model.model.language_model.layers.4.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
56
+ "backbone.qwen_model.model.language_model.layers.4.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
57
+ "backbone.qwen_model.model.language_model.layers.4.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
58
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
59
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
60
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
61
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
62
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
63
+ "backbone.qwen_model.model.language_model.layers.4.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
64
+ "backbone.qwen_model.model.language_model.layers.5.layer.input_layernorm.weight": "model-00001-of-00004.safetensors",
65
+ "backbone.qwen_model.model.language_model.layers.5.layer.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
66
+ "backbone.qwen_model.model.language_model.layers.5.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
67
+ "backbone.qwen_model.model.language_model.layers.5.layer.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
68
+ "backbone.qwen_model.model.language_model.layers.5.layer.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
69
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
70
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
71
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
72
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
73
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
74
+ "backbone.qwen_model.model.language_model.layers.5.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
75
+ "backbone.qwen_model.model.language_model.layers.6.layer.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
76
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.k_norm.weight": "model-00001-of-00004.safetensors",
77
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
78
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
79
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.q_norm.weight": "model-00001-of-00004.safetensors",
80
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
81
+ "backbone.qwen_model.model.language_model.layers.6.layer.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
82
+ "backbone.qwen_model.model.visual.blocks.0.attn.proj.bias": "model-00001-of-00004.safetensors",
83
+ "backbone.qwen_model.model.visual.blocks.0.attn.proj.weight": "model-00001-of-00004.safetensors",
84
+ "backbone.qwen_model.model.visual.blocks.0.attn.qkv.bias": "model-00001-of-00004.safetensors",
85
+ "backbone.qwen_model.model.visual.blocks.0.attn.qkv.weight": "model-00001-of-00004.safetensors",
86
+ "backbone.qwen_model.model.visual.blocks.0.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
87
+ "backbone.qwen_model.model.visual.blocks.0.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
88
+ "backbone.qwen_model.model.visual.blocks.0.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
89
+ "backbone.qwen_model.model.visual.blocks.0.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
90
+ "backbone.qwen_model.model.visual.blocks.0.norm1.bias": "model-00001-of-00004.safetensors",
91
+ "backbone.qwen_model.model.visual.blocks.0.norm1.weight": "model-00001-of-00004.safetensors",
92
+ "backbone.qwen_model.model.visual.blocks.0.norm2.bias": "model-00001-of-00004.safetensors",
93
+ "backbone.qwen_model.model.visual.blocks.0.norm2.weight": "model-00001-of-00004.safetensors",
94
+ "backbone.qwen_model.model.visual.blocks.1.attn.proj.bias": "model-00001-of-00004.safetensors",
95
+ "backbone.qwen_model.model.visual.blocks.1.attn.proj.weight": "model-00001-of-00004.safetensors",
96
+ "backbone.qwen_model.model.visual.blocks.1.attn.qkv.bias": "model-00001-of-00004.safetensors",
97
+ "backbone.qwen_model.model.visual.blocks.1.attn.qkv.weight": "model-00001-of-00004.safetensors",
98
+ "backbone.qwen_model.model.visual.blocks.1.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
99
+ "backbone.qwen_model.model.visual.blocks.1.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
100
+ "backbone.qwen_model.model.visual.blocks.1.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
101
+ "backbone.qwen_model.model.visual.blocks.1.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
102
+ "backbone.qwen_model.model.visual.blocks.1.norm1.bias": "model-00001-of-00004.safetensors",
103
+ "backbone.qwen_model.model.visual.blocks.1.norm1.weight": "model-00001-of-00004.safetensors",
104
+ "backbone.qwen_model.model.visual.blocks.1.norm2.bias": "model-00001-of-00004.safetensors",
105
+ "backbone.qwen_model.model.visual.blocks.1.norm2.weight": "model-00001-of-00004.safetensors",
106
+ "backbone.qwen_model.model.visual.blocks.10.attn.proj.bias": "model-00001-of-00004.safetensors",
107
+ "backbone.qwen_model.model.visual.blocks.10.attn.proj.weight": "model-00001-of-00004.safetensors",
108
+ "backbone.qwen_model.model.visual.blocks.10.attn.qkv.bias": "model-00001-of-00004.safetensors",
109
+ "backbone.qwen_model.model.visual.blocks.10.attn.qkv.weight": "model-00001-of-00004.safetensors",
110
+ "backbone.qwen_model.model.visual.blocks.10.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
111
+ "backbone.qwen_model.model.visual.blocks.10.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
112
+ "backbone.qwen_model.model.visual.blocks.10.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
113
+ "backbone.qwen_model.model.visual.blocks.10.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
114
+ "backbone.qwen_model.model.visual.blocks.10.norm1.bias": "model-00001-of-00004.safetensors",
115
+ "backbone.qwen_model.model.visual.blocks.10.norm1.weight": "model-00001-of-00004.safetensors",
116
+ "backbone.qwen_model.model.visual.blocks.10.norm2.bias": "model-00001-of-00004.safetensors",
117
+ "backbone.qwen_model.model.visual.blocks.10.norm2.weight": "model-00001-of-00004.safetensors",
118
+ "backbone.qwen_model.model.visual.blocks.11.attn.proj.bias": "model-00001-of-00004.safetensors",
119
+ "backbone.qwen_model.model.visual.blocks.11.attn.proj.weight": "model-00001-of-00004.safetensors",
120
+ "backbone.qwen_model.model.visual.blocks.11.attn.qkv.bias": "model-00001-of-00004.safetensors",
121
+ "backbone.qwen_model.model.visual.blocks.11.attn.qkv.weight": "model-00001-of-00004.safetensors",
122
+ "backbone.qwen_model.model.visual.blocks.11.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
123
+ "backbone.qwen_model.model.visual.blocks.11.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
124
+ "backbone.qwen_model.model.visual.blocks.11.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
125
+ "backbone.qwen_model.model.visual.blocks.11.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
126
+ "backbone.qwen_model.model.visual.blocks.11.norm1.bias": "model-00001-of-00004.safetensors",
127
+ "backbone.qwen_model.model.visual.blocks.11.norm1.weight": "model-00001-of-00004.safetensors",
128
+ "backbone.qwen_model.model.visual.blocks.11.norm2.bias": "model-00001-of-00004.safetensors",
129
+ "backbone.qwen_model.model.visual.blocks.11.norm2.weight": "model-00001-of-00004.safetensors",
130
+ "backbone.qwen_model.model.visual.blocks.12.attn.proj.bias": "model-00001-of-00004.safetensors",
131
+ "backbone.qwen_model.model.visual.blocks.12.attn.proj.weight": "model-00001-of-00004.safetensors",
132
+ "backbone.qwen_model.model.visual.blocks.12.attn.qkv.bias": "model-00001-of-00004.safetensors",
133
+ "backbone.qwen_model.model.visual.blocks.12.attn.qkv.weight": "model-00001-of-00004.safetensors",
134
+ "backbone.qwen_model.model.visual.blocks.12.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
135
+ "backbone.qwen_model.model.visual.blocks.12.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
136
+ "backbone.qwen_model.model.visual.blocks.12.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
137
+ "backbone.qwen_model.model.visual.blocks.12.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
138
+ "backbone.qwen_model.model.visual.blocks.12.norm1.bias": "model-00001-of-00004.safetensors",
139
+ "backbone.qwen_model.model.visual.blocks.12.norm1.weight": "model-00001-of-00004.safetensors",
140
+ "backbone.qwen_model.model.visual.blocks.12.norm2.bias": "model-00001-of-00004.safetensors",
141
+ "backbone.qwen_model.model.visual.blocks.12.norm2.weight": "model-00001-of-00004.safetensors",
142
+ "backbone.qwen_model.model.visual.blocks.13.attn.proj.bias": "model-00001-of-00004.safetensors",
143
+ "backbone.qwen_model.model.visual.blocks.13.attn.proj.weight": "model-00001-of-00004.safetensors",
144
+ "backbone.qwen_model.model.visual.blocks.13.attn.qkv.bias": "model-00001-of-00004.safetensors",
145
+ "backbone.qwen_model.model.visual.blocks.13.attn.qkv.weight": "model-00001-of-00004.safetensors",
146
+ "backbone.qwen_model.model.visual.blocks.13.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
147
+ "backbone.qwen_model.model.visual.blocks.13.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
148
+ "backbone.qwen_model.model.visual.blocks.13.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
149
+ "backbone.qwen_model.model.visual.blocks.13.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
150
+ "backbone.qwen_model.model.visual.blocks.13.norm1.bias": "model-00001-of-00004.safetensors",
151
+ "backbone.qwen_model.model.visual.blocks.13.norm1.weight": "model-00001-of-00004.safetensors",
152
+ "backbone.qwen_model.model.visual.blocks.13.norm2.bias": "model-00001-of-00004.safetensors",
153
+ "backbone.qwen_model.model.visual.blocks.13.norm2.weight": "model-00001-of-00004.safetensors",
154
+ "backbone.qwen_model.model.visual.blocks.14.attn.proj.bias": "model-00001-of-00004.safetensors",
155
+ "backbone.qwen_model.model.visual.blocks.14.attn.proj.weight": "model-00001-of-00004.safetensors",
156
+ "backbone.qwen_model.model.visual.blocks.14.attn.qkv.bias": "model-00001-of-00004.safetensors",
157
+ "backbone.qwen_model.model.visual.blocks.14.attn.qkv.weight": "model-00001-of-00004.safetensors",
158
+ "backbone.qwen_model.model.visual.blocks.14.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
159
+ "backbone.qwen_model.model.visual.blocks.14.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
160
+ "backbone.qwen_model.model.visual.blocks.14.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
161
+ "backbone.qwen_model.model.visual.blocks.14.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
162
+ "backbone.qwen_model.model.visual.blocks.14.norm1.bias": "model-00001-of-00004.safetensors",
163
+ "backbone.qwen_model.model.visual.blocks.14.norm1.weight": "model-00001-of-00004.safetensors",
164
+ "backbone.qwen_model.model.visual.blocks.14.norm2.bias": "model-00001-of-00004.safetensors",
165
+ "backbone.qwen_model.model.visual.blocks.14.norm2.weight": "model-00001-of-00004.safetensors",
166
+ "backbone.qwen_model.model.visual.blocks.15.attn.proj.bias": "model-00001-of-00004.safetensors",
167
+ "backbone.qwen_model.model.visual.blocks.15.attn.proj.weight": "model-00001-of-00004.safetensors",
168
+ "backbone.qwen_model.model.visual.blocks.15.attn.qkv.bias": "model-00001-of-00004.safetensors",
169
+ "backbone.qwen_model.model.visual.blocks.15.attn.qkv.weight": "model-00001-of-00004.safetensors",
170
+ "backbone.qwen_model.model.visual.blocks.15.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
171
+ "backbone.qwen_model.model.visual.blocks.15.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
172
+ "backbone.qwen_model.model.visual.blocks.15.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
173
+ "backbone.qwen_model.model.visual.blocks.15.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
174
+ "backbone.qwen_model.model.visual.blocks.15.norm1.bias": "model-00001-of-00004.safetensors",
175
+ "backbone.qwen_model.model.visual.blocks.15.norm1.weight": "model-00001-of-00004.safetensors",
176
+ "backbone.qwen_model.model.visual.blocks.15.norm2.bias": "model-00001-of-00004.safetensors",
177
+ "backbone.qwen_model.model.visual.blocks.15.norm2.weight": "model-00001-of-00004.safetensors",
178
+ "backbone.qwen_model.model.visual.blocks.16.attn.proj.bias": "model-00001-of-00004.safetensors",
179
+ "backbone.qwen_model.model.visual.blocks.16.attn.proj.weight": "model-00001-of-00004.safetensors",
180
+ "backbone.qwen_model.model.visual.blocks.16.attn.qkv.bias": "model-00001-of-00004.safetensors",
181
+ "backbone.qwen_model.model.visual.blocks.16.attn.qkv.weight": "model-00001-of-00004.safetensors",
182
+ "backbone.qwen_model.model.visual.blocks.16.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
183
+ "backbone.qwen_model.model.visual.blocks.16.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
184
+ "backbone.qwen_model.model.visual.blocks.16.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
185
+ "backbone.qwen_model.model.visual.blocks.16.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
186
+ "backbone.qwen_model.model.visual.blocks.16.norm1.bias": "model-00001-of-00004.safetensors",
187
+ "backbone.qwen_model.model.visual.blocks.16.norm1.weight": "model-00001-of-00004.safetensors",
188
+ "backbone.qwen_model.model.visual.blocks.16.norm2.bias": "model-00001-of-00004.safetensors",
189
+ "backbone.qwen_model.model.visual.blocks.16.norm2.weight": "model-00001-of-00004.safetensors",
190
+ "backbone.qwen_model.model.visual.blocks.17.attn.proj.bias": "model-00001-of-00004.safetensors",
191
+ "backbone.qwen_model.model.visual.blocks.17.attn.proj.weight": "model-00001-of-00004.safetensors",
192
+ "backbone.qwen_model.model.visual.blocks.17.attn.qkv.bias": "model-00001-of-00004.safetensors",
193
+ "backbone.qwen_model.model.visual.blocks.17.attn.qkv.weight": "model-00001-of-00004.safetensors",
194
+ "backbone.qwen_model.model.visual.blocks.17.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
195
+ "backbone.qwen_model.model.visual.blocks.17.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
196
+ "backbone.qwen_model.model.visual.blocks.17.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
197
+ "backbone.qwen_model.model.visual.blocks.17.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
198
+ "backbone.qwen_model.model.visual.blocks.17.norm1.bias": "model-00001-of-00004.safetensors",
199
+ "backbone.qwen_model.model.visual.blocks.17.norm1.weight": "model-00001-of-00004.safetensors",
200
+ "backbone.qwen_model.model.visual.blocks.17.norm2.bias": "model-00001-of-00004.safetensors",
201
+ "backbone.qwen_model.model.visual.blocks.17.norm2.weight": "model-00001-of-00004.safetensors",
202
+ "backbone.qwen_model.model.visual.blocks.18.attn.proj.bias": "model-00001-of-00004.safetensors",
203
+ "backbone.qwen_model.model.visual.blocks.18.attn.proj.weight": "model-00001-of-00004.safetensors",
204
+ "backbone.qwen_model.model.visual.blocks.18.attn.qkv.bias": "model-00001-of-00004.safetensors",
205
+ "backbone.qwen_model.model.visual.blocks.18.attn.qkv.weight": "model-00001-of-00004.safetensors",
206
+ "backbone.qwen_model.model.visual.blocks.18.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
207
+ "backbone.qwen_model.model.visual.blocks.18.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
208
+ "backbone.qwen_model.model.visual.blocks.18.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
209
+ "backbone.qwen_model.model.visual.blocks.18.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
210
+ "backbone.qwen_model.model.visual.blocks.18.norm1.bias": "model-00001-of-00004.safetensors",
211
+ "backbone.qwen_model.model.visual.blocks.18.norm1.weight": "model-00001-of-00004.safetensors",
212
+ "backbone.qwen_model.model.visual.blocks.18.norm2.bias": "model-00001-of-00004.safetensors",
213
+ "backbone.qwen_model.model.visual.blocks.18.norm2.weight": "model-00001-of-00004.safetensors",
214
+ "backbone.qwen_model.model.visual.blocks.19.attn.proj.bias": "model-00001-of-00004.safetensors",
215
+ "backbone.qwen_model.model.visual.blocks.19.attn.proj.weight": "model-00001-of-00004.safetensors",
216
+ "backbone.qwen_model.model.visual.blocks.19.attn.qkv.bias": "model-00001-of-00004.safetensors",
217
+ "backbone.qwen_model.model.visual.blocks.19.attn.qkv.weight": "model-00001-of-00004.safetensors",
218
+ "backbone.qwen_model.model.visual.blocks.19.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
219
+ "backbone.qwen_model.model.visual.blocks.19.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
220
+ "backbone.qwen_model.model.visual.blocks.19.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
221
+ "backbone.qwen_model.model.visual.blocks.19.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
222
+ "backbone.qwen_model.model.visual.blocks.19.norm1.bias": "model-00001-of-00004.safetensors",
223
+ "backbone.qwen_model.model.visual.blocks.19.norm1.weight": "model-00001-of-00004.safetensors",
224
+ "backbone.qwen_model.model.visual.blocks.19.norm2.bias": "model-00001-of-00004.safetensors",
225
+ "backbone.qwen_model.model.visual.blocks.19.norm2.weight": "model-00001-of-00004.safetensors",
226
+ "backbone.qwen_model.model.visual.blocks.2.attn.proj.bias": "model-00001-of-00004.safetensors",
227
+ "backbone.qwen_model.model.visual.blocks.2.attn.proj.weight": "model-00001-of-00004.safetensors",
228
+ "backbone.qwen_model.model.visual.blocks.2.attn.qkv.bias": "model-00001-of-00004.safetensors",
229
+ "backbone.qwen_model.model.visual.blocks.2.attn.qkv.weight": "model-00001-of-00004.safetensors",
230
+ "backbone.qwen_model.model.visual.blocks.2.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
231
+ "backbone.qwen_model.model.visual.blocks.2.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
232
+ "backbone.qwen_model.model.visual.blocks.2.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
233
+ "backbone.qwen_model.model.visual.blocks.2.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
234
+ "backbone.qwen_model.model.visual.blocks.2.norm1.bias": "model-00001-of-00004.safetensors",
235
+ "backbone.qwen_model.model.visual.blocks.2.norm1.weight": "model-00001-of-00004.safetensors",
236
+ "backbone.qwen_model.model.visual.blocks.2.norm2.bias": "model-00001-of-00004.safetensors",
237
+ "backbone.qwen_model.model.visual.blocks.2.norm2.weight": "model-00001-of-00004.safetensors",
238
+ "backbone.qwen_model.model.visual.blocks.20.attn.proj.bias": "model-00001-of-00004.safetensors",
239
+ "backbone.qwen_model.model.visual.blocks.20.attn.proj.weight": "model-00001-of-00004.safetensors",
240
+ "backbone.qwen_model.model.visual.blocks.20.attn.qkv.bias": "model-00001-of-00004.safetensors",
241
+ "backbone.qwen_model.model.visual.blocks.20.attn.qkv.weight": "model-00001-of-00004.safetensors",
242
+ "backbone.qwen_model.model.visual.blocks.20.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
243
+ "backbone.qwen_model.model.visual.blocks.20.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
244
+ "backbone.qwen_model.model.visual.blocks.20.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
245
+ "backbone.qwen_model.model.visual.blocks.20.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
246
+ "backbone.qwen_model.model.visual.blocks.20.norm1.bias": "model-00001-of-00004.safetensors",
247
+ "backbone.qwen_model.model.visual.blocks.20.norm1.weight": "model-00001-of-00004.safetensors",
248
+ "backbone.qwen_model.model.visual.blocks.20.norm2.bias": "model-00001-of-00004.safetensors",
249
+ "backbone.qwen_model.model.visual.blocks.20.norm2.weight": "model-00001-of-00004.safetensors",
250
+ "backbone.qwen_model.model.visual.blocks.21.attn.proj.bias": "model-00001-of-00004.safetensors",
251
+ "backbone.qwen_model.model.visual.blocks.21.attn.proj.weight": "model-00001-of-00004.safetensors",
252
+ "backbone.qwen_model.model.visual.blocks.21.attn.qkv.bias": "model-00001-of-00004.safetensors",
253
+ "backbone.qwen_model.model.visual.blocks.21.attn.qkv.weight": "model-00001-of-00004.safetensors",
254
+ "backbone.qwen_model.model.visual.blocks.21.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
255
+ "backbone.qwen_model.model.visual.blocks.21.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
256
+ "backbone.qwen_model.model.visual.blocks.21.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
257
+ "backbone.qwen_model.model.visual.blocks.21.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
258
+ "backbone.qwen_model.model.visual.blocks.21.norm1.bias": "model-00001-of-00004.safetensors",
259
+ "backbone.qwen_model.model.visual.blocks.21.norm1.weight": "model-00001-of-00004.safetensors",
260
+ "backbone.qwen_model.model.visual.blocks.21.norm2.bias": "model-00001-of-00004.safetensors",
261
+ "backbone.qwen_model.model.visual.blocks.21.norm2.weight": "model-00001-of-00004.safetensors",
262
+ "backbone.qwen_model.model.visual.blocks.22.attn.proj.bias": "model-00001-of-00004.safetensors",
263
+ "backbone.qwen_model.model.visual.blocks.22.attn.proj.weight": "model-00001-of-00004.safetensors",
264
+ "backbone.qwen_model.model.visual.blocks.22.attn.qkv.bias": "model-00001-of-00004.safetensors",
265
+ "backbone.qwen_model.model.visual.blocks.22.attn.qkv.weight": "model-00001-of-00004.safetensors",
266
+ "backbone.qwen_model.model.visual.blocks.22.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
267
+ "backbone.qwen_model.model.visual.blocks.22.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
268
+ "backbone.qwen_model.model.visual.blocks.22.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
269
+ "backbone.qwen_model.model.visual.blocks.22.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
270
+ "backbone.qwen_model.model.visual.blocks.22.norm1.bias": "model-00001-of-00004.safetensors",
271
+ "backbone.qwen_model.model.visual.blocks.22.norm1.weight": "model-00001-of-00004.safetensors",
272
+ "backbone.qwen_model.model.visual.blocks.22.norm2.bias": "model-00001-of-00004.safetensors",
273
+ "backbone.qwen_model.model.visual.blocks.22.norm2.weight": "model-00001-of-00004.safetensors",
274
+ "backbone.qwen_model.model.visual.blocks.23.attn.proj.bias": "model-00001-of-00004.safetensors",
275
+ "backbone.qwen_model.model.visual.blocks.23.attn.proj.weight": "model-00001-of-00004.safetensors",
276
+ "backbone.qwen_model.model.visual.blocks.23.attn.qkv.bias": "model-00001-of-00004.safetensors",
277
+ "backbone.qwen_model.model.visual.blocks.23.attn.qkv.weight": "model-00001-of-00004.safetensors",
278
+ "backbone.qwen_model.model.visual.blocks.23.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
279
+ "backbone.qwen_model.model.visual.blocks.23.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
280
+ "backbone.qwen_model.model.visual.blocks.23.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
281
+ "backbone.qwen_model.model.visual.blocks.23.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
282
+ "backbone.qwen_model.model.visual.blocks.23.norm1.bias": "model-00001-of-00004.safetensors",
283
+ "backbone.qwen_model.model.visual.blocks.23.norm1.weight": "model-00001-of-00004.safetensors",
284
+ "backbone.qwen_model.model.visual.blocks.23.norm2.bias": "model-00001-of-00004.safetensors",
285
+ "backbone.qwen_model.model.visual.blocks.23.norm2.weight": "model-00001-of-00004.safetensors",
286
+ "backbone.qwen_model.model.visual.blocks.24.attn.proj.bias": "model-00001-of-00004.safetensors",
287
+ "backbone.qwen_model.model.visual.blocks.24.attn.proj.weight": "model-00001-of-00004.safetensors",
288
+ "backbone.qwen_model.model.visual.blocks.24.attn.qkv.bias": "model-00001-of-00004.safetensors",
289
+ "backbone.qwen_model.model.visual.blocks.24.attn.qkv.weight": "model-00001-of-00004.safetensors",
290
+ "backbone.qwen_model.model.visual.blocks.24.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
291
+ "backbone.qwen_model.model.visual.blocks.24.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
292
+ "backbone.qwen_model.model.visual.blocks.24.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
293
+ "backbone.qwen_model.model.visual.blocks.24.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
294
+ "backbone.qwen_model.model.visual.blocks.24.norm1.bias": "model-00001-of-00004.safetensors",
295
+ "backbone.qwen_model.model.visual.blocks.24.norm1.weight": "model-00001-of-00004.safetensors",
296
+ "backbone.qwen_model.model.visual.blocks.24.norm2.bias": "model-00001-of-00004.safetensors",
297
+ "backbone.qwen_model.model.visual.blocks.24.norm2.weight": "model-00001-of-00004.safetensors",
298
+ "backbone.qwen_model.model.visual.blocks.25.attn.proj.bias": "model-00001-of-00004.safetensors",
299
+ "backbone.qwen_model.model.visual.blocks.25.attn.proj.weight": "model-00001-of-00004.safetensors",
300
+ "backbone.qwen_model.model.visual.blocks.25.attn.qkv.bias": "model-00001-of-00004.safetensors",
301
+ "backbone.qwen_model.model.visual.blocks.25.attn.qkv.weight": "model-00001-of-00004.safetensors",
302
+ "backbone.qwen_model.model.visual.blocks.25.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
303
+ "backbone.qwen_model.model.visual.blocks.25.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
304
+ "backbone.qwen_model.model.visual.blocks.25.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
305
+ "backbone.qwen_model.model.visual.blocks.25.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
306
+ "backbone.qwen_model.model.visual.blocks.25.norm1.bias": "model-00001-of-00004.safetensors",
307
+ "backbone.qwen_model.model.visual.blocks.25.norm1.weight": "model-00001-of-00004.safetensors",
308
+ "backbone.qwen_model.model.visual.blocks.25.norm2.bias": "model-00001-of-00004.safetensors",
309
+ "backbone.qwen_model.model.visual.blocks.25.norm2.weight": "model-00001-of-00004.safetensors",
310
+ "backbone.qwen_model.model.visual.blocks.26.attn.proj.bias": "model-00001-of-00004.safetensors",
311
+ "backbone.qwen_model.model.visual.blocks.26.attn.proj.weight": "model-00001-of-00004.safetensors",
312
+ "backbone.qwen_model.model.visual.blocks.26.attn.qkv.bias": "model-00001-of-00004.safetensors",
313
+ "backbone.qwen_model.model.visual.blocks.26.attn.qkv.weight": "model-00001-of-00004.safetensors",
314
+ "backbone.qwen_model.model.visual.blocks.26.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
315
+ "backbone.qwen_model.model.visual.blocks.26.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
316
+ "backbone.qwen_model.model.visual.blocks.26.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
317
+ "backbone.qwen_model.model.visual.blocks.26.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
318
+ "backbone.qwen_model.model.visual.blocks.26.norm1.bias": "model-00001-of-00004.safetensors",
319
+ "backbone.qwen_model.model.visual.blocks.26.norm1.weight": "model-00001-of-00004.safetensors",
320
+ "backbone.qwen_model.model.visual.blocks.26.norm2.bias": "model-00001-of-00004.safetensors",
321
+ "backbone.qwen_model.model.visual.blocks.26.norm2.weight": "model-00001-of-00004.safetensors",
322
+ "backbone.qwen_model.model.visual.blocks.3.attn.proj.bias": "model-00001-of-00004.safetensors",
323
+ "backbone.qwen_model.model.visual.blocks.3.attn.proj.weight": "model-00001-of-00004.safetensors",
324
+ "backbone.qwen_model.model.visual.blocks.3.attn.qkv.bias": "model-00001-of-00004.safetensors",
325
+ "backbone.qwen_model.model.visual.blocks.3.attn.qkv.weight": "model-00001-of-00004.safetensors",
326
+ "backbone.qwen_model.model.visual.blocks.3.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
327
+ "backbone.qwen_model.model.visual.blocks.3.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
328
+ "backbone.qwen_model.model.visual.blocks.3.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
329
+ "backbone.qwen_model.model.visual.blocks.3.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
330
+ "backbone.qwen_model.model.visual.blocks.3.norm1.bias": "model-00001-of-00004.safetensors",
331
+ "backbone.qwen_model.model.visual.blocks.3.norm1.weight": "model-00001-of-00004.safetensors",
332
+ "backbone.qwen_model.model.visual.blocks.3.norm2.bias": "model-00001-of-00004.safetensors",
333
+ "backbone.qwen_model.model.visual.blocks.3.norm2.weight": "model-00001-of-00004.safetensors",
334
+ "backbone.qwen_model.model.visual.blocks.4.attn.proj.bias": "model-00001-of-00004.safetensors",
335
+ "backbone.qwen_model.model.visual.blocks.4.attn.proj.weight": "model-00001-of-00004.safetensors",
336
+ "backbone.qwen_model.model.visual.blocks.4.attn.qkv.bias": "model-00001-of-00004.safetensors",
337
+ "backbone.qwen_model.model.visual.blocks.4.attn.qkv.weight": "model-00001-of-00004.safetensors",
338
+ "backbone.qwen_model.model.visual.blocks.4.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
339
+ "backbone.qwen_model.model.visual.blocks.4.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
340
+ "backbone.qwen_model.model.visual.blocks.4.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
341
+ "backbone.qwen_model.model.visual.blocks.4.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
342
+ "backbone.qwen_model.model.visual.blocks.4.norm1.bias": "model-00001-of-00004.safetensors",
343
+ "backbone.qwen_model.model.visual.blocks.4.norm1.weight": "model-00001-of-00004.safetensors",
344
+ "backbone.qwen_model.model.visual.blocks.4.norm2.bias": "model-00001-of-00004.safetensors",
345
+ "backbone.qwen_model.model.visual.blocks.4.norm2.weight": "model-00001-of-00004.safetensors",
346
+ "backbone.qwen_model.model.visual.blocks.5.attn.proj.bias": "model-00001-of-00004.safetensors",
347
+ "backbone.qwen_model.model.visual.blocks.5.attn.proj.weight": "model-00001-of-00004.safetensors",
348
+ "backbone.qwen_model.model.visual.blocks.5.attn.qkv.bias": "model-00001-of-00004.safetensors",
349
+ "backbone.qwen_model.model.visual.blocks.5.attn.qkv.weight": "model-00001-of-00004.safetensors",
350
+ "backbone.qwen_model.model.visual.blocks.5.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
351
+ "backbone.qwen_model.model.visual.blocks.5.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
352
+ "backbone.qwen_model.model.visual.blocks.5.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
353
+ "backbone.qwen_model.model.visual.blocks.5.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
354
+ "backbone.qwen_model.model.visual.blocks.5.norm1.bias": "model-00001-of-00004.safetensors",
355
+ "backbone.qwen_model.model.visual.blocks.5.norm1.weight": "model-00001-of-00004.safetensors",
356
+ "backbone.qwen_model.model.visual.blocks.5.norm2.bias": "model-00001-of-00004.safetensors",
357
+ "backbone.qwen_model.model.visual.blocks.5.norm2.weight": "model-00001-of-00004.safetensors",
358
+ "backbone.qwen_model.model.visual.blocks.6.attn.proj.bias": "model-00001-of-00004.safetensors",
359
+ "backbone.qwen_model.model.visual.blocks.6.attn.proj.weight": "model-00001-of-00004.safetensors",
360
+ "backbone.qwen_model.model.visual.blocks.6.attn.qkv.bias": "model-00001-of-00004.safetensors",
361
+ "backbone.qwen_model.model.visual.blocks.6.attn.qkv.weight": "model-00001-of-00004.safetensors",
362
+ "backbone.qwen_model.model.visual.blocks.6.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
363
+ "backbone.qwen_model.model.visual.blocks.6.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
364
+ "backbone.qwen_model.model.visual.blocks.6.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
365
+ "backbone.qwen_model.model.visual.blocks.6.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
366
+ "backbone.qwen_model.model.visual.blocks.6.norm1.bias": "model-00001-of-00004.safetensors",
367
+ "backbone.qwen_model.model.visual.blocks.6.norm1.weight": "model-00001-of-00004.safetensors",
368
+ "backbone.qwen_model.model.visual.blocks.6.norm2.bias": "model-00001-of-00004.safetensors",
369
+ "backbone.qwen_model.model.visual.blocks.6.norm2.weight": "model-00001-of-00004.safetensors",
370
+ "backbone.qwen_model.model.visual.blocks.7.attn.proj.bias": "model-00001-of-00004.safetensors",
371
+ "backbone.qwen_model.model.visual.blocks.7.attn.proj.weight": "model-00001-of-00004.safetensors",
372
+ "backbone.qwen_model.model.visual.blocks.7.attn.qkv.bias": "model-00001-of-00004.safetensors",
373
+ "backbone.qwen_model.model.visual.blocks.7.attn.qkv.weight": "model-00001-of-00004.safetensors",
374
+ "backbone.qwen_model.model.visual.blocks.7.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
375
+ "backbone.qwen_model.model.visual.blocks.7.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
376
+ "backbone.qwen_model.model.visual.blocks.7.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
377
+ "backbone.qwen_model.model.visual.blocks.7.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
378
+ "backbone.qwen_model.model.visual.blocks.7.norm1.bias": "model-00001-of-00004.safetensors",
379
+ "backbone.qwen_model.model.visual.blocks.7.norm1.weight": "model-00001-of-00004.safetensors",
380
+ "backbone.qwen_model.model.visual.blocks.7.norm2.bias": "model-00001-of-00004.safetensors",
381
+ "backbone.qwen_model.model.visual.blocks.7.norm2.weight": "model-00001-of-00004.safetensors",
382
+ "backbone.qwen_model.model.visual.blocks.8.attn.proj.bias": "model-00001-of-00004.safetensors",
383
+ "backbone.qwen_model.model.visual.blocks.8.attn.proj.weight": "model-00001-of-00004.safetensors",
384
+ "backbone.qwen_model.model.visual.blocks.8.attn.qkv.bias": "model-00001-of-00004.safetensors",
385
+ "backbone.qwen_model.model.visual.blocks.8.attn.qkv.weight": "model-00001-of-00004.safetensors",
386
+ "backbone.qwen_model.model.visual.blocks.8.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
387
+ "backbone.qwen_model.model.visual.blocks.8.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
388
+ "backbone.qwen_model.model.visual.blocks.8.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
389
+ "backbone.qwen_model.model.visual.blocks.8.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
390
+ "backbone.qwen_model.model.visual.blocks.8.norm1.bias": "model-00001-of-00004.safetensors",
391
+ "backbone.qwen_model.model.visual.blocks.8.norm1.weight": "model-00001-of-00004.safetensors",
392
+ "backbone.qwen_model.model.visual.blocks.8.norm2.bias": "model-00001-of-00004.safetensors",
393
+ "backbone.qwen_model.model.visual.blocks.8.norm2.weight": "model-00001-of-00004.safetensors",
394
+ "backbone.qwen_model.model.visual.blocks.9.attn.proj.bias": "model-00001-of-00004.safetensors",
395
+ "backbone.qwen_model.model.visual.blocks.9.attn.proj.weight": "model-00001-of-00004.safetensors",
396
+ "backbone.qwen_model.model.visual.blocks.9.attn.qkv.bias": "model-00001-of-00004.safetensors",
397
+ "backbone.qwen_model.model.visual.blocks.9.attn.qkv.weight": "model-00001-of-00004.safetensors",
398
+ "backbone.qwen_model.model.visual.blocks.9.mlp.linear_fc1.bias": "model-00001-of-00004.safetensors",
399
+ "backbone.qwen_model.model.visual.blocks.9.mlp.linear_fc1.weight": "model-00001-of-00004.safetensors",
400
+ "backbone.qwen_model.model.visual.blocks.9.mlp.linear_fc2.bias": "model-00001-of-00004.safetensors",
401
+ "backbone.qwen_model.model.visual.blocks.9.mlp.linear_fc2.weight": "model-00001-of-00004.safetensors",
402
+ "backbone.qwen_model.model.visual.blocks.9.norm1.bias": "model-00001-of-00004.safetensors",
403
+ "backbone.qwen_model.model.visual.blocks.9.norm1.weight": "model-00001-of-00004.safetensors",
404
+ "backbone.qwen_model.model.visual.blocks.9.norm2.bias": "model-00001-of-00004.safetensors",
405
+ "backbone.qwen_model.model.visual.blocks.9.norm2.weight": "model-00001-of-00004.safetensors",
406
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.linear_fc1.bias": "model-00001-of-00004.safetensors",
407
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.linear_fc1.weight": "model-00001-of-00004.safetensors",
408
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.linear_fc2.bias": "model-00001-of-00004.safetensors",
409
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.linear_fc2.weight": "model-00001-of-00004.safetensors",
410
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.norm.bias": "model-00001-of-00004.safetensors",
411
+ "backbone.qwen_model.model.visual.deepstack_merger_list.0.norm.weight": "model-00001-of-00004.safetensors",
412
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.linear_fc1.bias": "model-00001-of-00004.safetensors",
413
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.linear_fc1.weight": "model-00001-of-00004.safetensors",
414
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.linear_fc2.bias": "model-00001-of-00004.safetensors",
415
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.linear_fc2.weight": "model-00001-of-00004.safetensors",
416
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.norm.bias": "model-00001-of-00004.safetensors",
417
+ "backbone.qwen_model.model.visual.deepstack_merger_list.1.norm.weight": "model-00001-of-00004.safetensors",
418
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.linear_fc1.bias": "model-00001-of-00004.safetensors",
419
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.linear_fc1.weight": "model-00001-of-00004.safetensors",
420
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.linear_fc2.bias": "model-00001-of-00004.safetensors",
421
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.linear_fc2.weight": "model-00001-of-00004.safetensors",
422
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.norm.bias": "model-00001-of-00004.safetensors",
423
+ "backbone.qwen_model.model.visual.deepstack_merger_list.2.norm.weight": "model-00001-of-00004.safetensors",
424
+ "backbone.qwen_model.model.visual.merger.linear_fc1.bias": "model-00001-of-00004.safetensors",
425
+ "backbone.qwen_model.model.visual.merger.linear_fc1.weight": "model-00001-of-00004.safetensors",
426
+ "backbone.qwen_model.model.visual.merger.linear_fc2.bias": "model-00001-of-00004.safetensors",
427
+ "backbone.qwen_model.model.visual.merger.linear_fc2.weight": "model-00001-of-00004.safetensors",
428
+ "backbone.qwen_model.model.visual.merger.norm.bias": "model-00001-of-00004.safetensors",
429
+ "backbone.qwen_model.model.visual.merger.norm.weight": "model-00001-of-00004.safetensors",
430
+ "backbone.qwen_model.model.visual.motion_block.out_proj.bias": "model-00001-of-00004.safetensors",
431
+ "backbone.qwen_model.model.visual.motion_block.out_proj.weight": "model-00001-of-00004.safetensors",
432
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.in_proj.bias": "model-00001-of-00004.safetensors",
433
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.in_proj.weight": "model-00001-of-00004.safetensors",
434
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.ln_pre.bias": "model-00001-of-00004.safetensors",
435
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.ln_pre.weight": "model-00001-of-00004.safetensors",
436
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.out_proj.bias": "model-00001-of-00004.safetensors",
437
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.out_proj.weight": "model-00001-of-00004.safetensors",
438
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.0.weight": "model-00001-of-00004.safetensors",
439
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.1.bias": "model-00001-of-00004.safetensors",
440
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.1.num_batches_tracked": "model-00001-of-00004.safetensors",
441
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.1.running_mean": "model-00001-of-00004.safetensors",
442
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.1.running_var": "model-00001-of-00004.safetensors",
443
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_extraction.conv0.1.weight": "model-00001-of-00004.safetensors",
444
+ "backbone.qwen_model.model.visual.motion_block.stss_encoders.0.stss_integration.fuse.1.weight": "model-00001-of-00004.safetensors",
445
+ "backbone.qwen_model.model.visual.patch_embed.proj.bias": "model-00001-of-00004.safetensors",
446
+ "backbone.qwen_model.model.visual.patch_embed.proj.weight": "model-00001-of-00004.safetensors",
447
+ "backbone.qwen_model.model.visual.pos_embed.weight": "model-00001-of-00004.safetensors",
448
+ "backbone.qwen_model.model.language_model.layers.10.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
449
+ "backbone.qwen_model.model.language_model.layers.10.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
450
+ "backbone.qwen_model.model.language_model.layers.10.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
451
+ "backbone.qwen_model.model.language_model.layers.10.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
452
+ "backbone.qwen_model.model.language_model.layers.10.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
453
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
454
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
455
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
456
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
457
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
458
+ "backbone.qwen_model.model.language_model.layers.10.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
459
+ "backbone.qwen_model.model.language_model.layers.11.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
460
+ "backbone.qwen_model.model.language_model.layers.11.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
461
+ "backbone.qwen_model.model.language_model.layers.11.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
462
+ "backbone.qwen_model.model.language_model.layers.11.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
463
+ "backbone.qwen_model.model.language_model.layers.11.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
464
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
465
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
466
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
467
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
468
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
469
+ "backbone.qwen_model.model.language_model.layers.11.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
470
+ "backbone.qwen_model.model.language_model.layers.12.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
471
+ "backbone.qwen_model.model.language_model.layers.12.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
472
+ "backbone.qwen_model.model.language_model.layers.12.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
473
+ "backbone.qwen_model.model.language_model.layers.12.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
474
+ "backbone.qwen_model.model.language_model.layers.12.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
475
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
476
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
477
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
478
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
479
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
480
+ "backbone.qwen_model.model.language_model.layers.12.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
481
+ "backbone.qwen_model.model.language_model.layers.13.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
482
+ "backbone.qwen_model.model.language_model.layers.13.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
483
+ "backbone.qwen_model.model.language_model.layers.13.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
484
+ "backbone.qwen_model.model.language_model.layers.13.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
485
+ "backbone.qwen_model.model.language_model.layers.13.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
486
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
487
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
488
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
489
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
490
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
491
+ "backbone.qwen_model.model.language_model.layers.13.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
492
+ "backbone.qwen_model.model.language_model.layers.14.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
493
+ "backbone.qwen_model.model.language_model.layers.14.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
494
+ "backbone.qwen_model.model.language_model.layers.14.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
495
+ "backbone.qwen_model.model.language_model.layers.14.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
496
+ "backbone.qwen_model.model.language_model.layers.14.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
497
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
498
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
499
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
500
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
501
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
502
+ "backbone.qwen_model.model.language_model.layers.14.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
503
+ "backbone.qwen_model.model.language_model.layers.15.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
504
+ "backbone.qwen_model.model.language_model.layers.15.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
505
+ "backbone.qwen_model.model.language_model.layers.15.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
506
+ "backbone.qwen_model.model.language_model.layers.15.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
507
+ "backbone.qwen_model.model.language_model.layers.15.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
508
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
509
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
510
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
511
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
512
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
513
+ "backbone.qwen_model.model.language_model.layers.15.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
514
+ "backbone.qwen_model.model.language_model.layers.16.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
515
+ "backbone.qwen_model.model.language_model.layers.16.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
516
+ "backbone.qwen_model.model.language_model.layers.16.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
517
+ "backbone.qwen_model.model.language_model.layers.16.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
518
+ "backbone.qwen_model.model.language_model.layers.16.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
519
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
520
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
521
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
522
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
523
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
524
+ "backbone.qwen_model.model.language_model.layers.16.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
525
+ "backbone.qwen_model.model.language_model.layers.17.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
526
+ "backbone.qwen_model.model.language_model.layers.17.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
527
+ "backbone.qwen_model.model.language_model.layers.17.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
528
+ "backbone.qwen_model.model.language_model.layers.17.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
529
+ "backbone.qwen_model.model.language_model.layers.17.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
530
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
531
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
532
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
533
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
534
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
535
+ "backbone.qwen_model.model.language_model.layers.17.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
536
+ "backbone.qwen_model.model.language_model.layers.6.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
537
+ "backbone.qwen_model.model.language_model.layers.6.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
538
+ "backbone.qwen_model.model.language_model.layers.6.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
539
+ "backbone.qwen_model.model.language_model.layers.6.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
540
+ "backbone.qwen_model.model.language_model.layers.7.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
541
+ "backbone.qwen_model.model.language_model.layers.7.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
542
+ "backbone.qwen_model.model.language_model.layers.7.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
543
+ "backbone.qwen_model.model.language_model.layers.7.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
544
+ "backbone.qwen_model.model.language_model.layers.7.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
545
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
546
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
547
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
548
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
549
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
550
+ "backbone.qwen_model.model.language_model.layers.7.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
551
+ "backbone.qwen_model.model.language_model.layers.8.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
552
+ "backbone.qwen_model.model.language_model.layers.8.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
553
+ "backbone.qwen_model.model.language_model.layers.8.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
554
+ "backbone.qwen_model.model.language_model.layers.8.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
555
+ "backbone.qwen_model.model.language_model.layers.8.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
556
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
557
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
558
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
559
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
560
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
561
+ "backbone.qwen_model.model.language_model.layers.8.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
562
+ "backbone.qwen_model.model.language_model.layers.9.layer.input_layernorm.weight": "model-00002-of-00004.safetensors",
563
+ "backbone.qwen_model.model.language_model.layers.9.layer.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
564
+ "backbone.qwen_model.model.language_model.layers.9.layer.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
565
+ "backbone.qwen_model.model.language_model.layers.9.layer.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
566
+ "backbone.qwen_model.model.language_model.layers.9.layer.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
567
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.k_norm.weight": "model-00002-of-00004.safetensors",
568
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
569
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
570
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.q_norm.weight": "model-00002-of-00004.safetensors",
571
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
572
+ "backbone.qwen_model.model.language_model.layers.9.layer.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
573
+ "backbone.qwen_model.model.language_model.norm.weight": "model-00002-of-00004.safetensors",
574
+ "action_model.mask_token": "model-00003-of-00004.safetensors",
575
+ "action_model.model.double_blocks.0.k_norm_p.weight": "model-00003-of-00004.safetensors",
576
+ "action_model.model.double_blocks.0.k_norm_sa.weight": "model-00003-of-00004.safetensors",
577
+ "action_model.model.double_blocks.0.k_norm_vl.weight": "model-00003-of-00004.safetensors",
578
+ "action_model.model.double_blocks.0.p_mlp.w12.bias": "model-00003-of-00004.safetensors",
579
+ "action_model.model.double_blocks.0.p_mlp.w12.weight": "model-00003-of-00004.safetensors",
580
+ "action_model.model.double_blocks.0.p_mlp.w3.bias": "model-00003-of-00004.safetensors",
581
+ "action_model.model.double_blocks.0.p_mlp.w3.weight": "model-00003-of-00004.safetensors",
582
+ "action_model.model.double_blocks.0.p_proj.bias": "model-00003-of-00004.safetensors",
583
+ "action_model.model.double_blocks.0.p_proj.weight": "model-00003-of-00004.safetensors",
584
+ "action_model.model.double_blocks.0.p_qkv.bias": "model-00003-of-00004.safetensors",
585
+ "action_model.model.double_blocks.0.p_qkv.weight": "model-00003-of-00004.safetensors",
586
+ "action_model.model.double_blocks.0.q_norm_p.weight": "model-00003-of-00004.safetensors",
587
+ "action_model.model.double_blocks.0.q_norm_sa.weight": "model-00003-of-00004.safetensors",
588
+ "action_model.model.double_blocks.0.q_norm_vl.weight": "model-00003-of-00004.safetensors",
589
+ "action_model.model.double_blocks.0.sa_mlp.w12.bias": "model-00003-of-00004.safetensors",
590
+ "action_model.model.double_blocks.0.sa_mlp.w12.weight": "model-00003-of-00004.safetensors",
591
+ "action_model.model.double_blocks.0.sa_mlp.w3.bias": "model-00003-of-00004.safetensors",
592
+ "action_model.model.double_blocks.0.sa_mlp.w3.weight": "model-00003-of-00004.safetensors",
593
+ "action_model.model.double_blocks.0.sa_proj.bias": "model-00003-of-00004.safetensors",
594
+ "action_model.model.double_blocks.0.sa_proj.weight": "model-00003-of-00004.safetensors",
595
+ "action_model.model.double_blocks.0.sa_qkv.bias": "model-00003-of-00004.safetensors",
596
+ "action_model.model.double_blocks.0.sa_qkv.weight": "model-00003-of-00004.safetensors",
597
+ "action_model.model.double_blocks.0.vl_mlp.w12.bias": "model-00003-of-00004.safetensors",
598
+ "action_model.model.double_blocks.0.vl_mlp.w12.weight": "model-00003-of-00004.safetensors",
599
+ "action_model.model.double_blocks.0.vl_mlp.w3.bias": "model-00003-of-00004.safetensors",
600
+ "action_model.model.double_blocks.0.vl_mlp.w3.weight": "model-00003-of-00004.safetensors",
601
+ "action_model.model.double_blocks.0.vl_proj.bias": "model-00003-of-00004.safetensors",
602
+ "action_model.model.double_blocks.0.vl_proj.weight": "model-00003-of-00004.safetensors",
603
+ "action_model.model.double_blocks.0.vl_qkv.bias": "model-00003-of-00004.safetensors",
604
+ "action_model.model.double_blocks.0.vl_qkv.weight": "model-00003-of-00004.safetensors",
605
+ "action_model.model.double_blocks.1.k_norm_p.weight": "model-00003-of-00004.safetensors",
606
+ "action_model.model.double_blocks.1.k_norm_sa.weight": "model-00003-of-00004.safetensors",
607
+ "action_model.model.double_blocks.1.k_norm_vl.weight": "model-00003-of-00004.safetensors",
608
+ "action_model.model.double_blocks.1.p_mlp.w12.bias": "model-00003-of-00004.safetensors",
609
+ "action_model.model.double_blocks.1.p_mlp.w12.weight": "model-00003-of-00004.safetensors",
610
+ "action_model.model.double_blocks.1.p_mlp.w3.bias": "model-00003-of-00004.safetensors",
611
+ "action_model.model.double_blocks.1.p_mlp.w3.weight": "model-00003-of-00004.safetensors",
612
+ "action_model.model.double_blocks.1.p_proj.bias": "model-00003-of-00004.safetensors",
613
+ "action_model.model.double_blocks.1.p_proj.weight": "model-00003-of-00004.safetensors",
614
+ "action_model.model.double_blocks.1.p_qkv.bias": "model-00003-of-00004.safetensors",
615
+ "action_model.model.double_blocks.1.p_qkv.weight": "model-00003-of-00004.safetensors",
616
+ "action_model.model.double_blocks.1.q_norm_p.weight": "model-00003-of-00004.safetensors",
617
+ "action_model.model.double_blocks.1.q_norm_sa.weight": "model-00003-of-00004.safetensors",
618
+ "action_model.model.double_blocks.1.q_norm_vl.weight": "model-00003-of-00004.safetensors",
619
+ "action_model.model.double_blocks.1.sa_mlp.w12.bias": "model-00003-of-00004.safetensors",
620
+ "action_model.model.double_blocks.1.sa_mlp.w12.weight": "model-00003-of-00004.safetensors",
621
+ "action_model.model.double_blocks.1.sa_mlp.w3.bias": "model-00003-of-00004.safetensors",
622
+ "action_model.model.double_blocks.1.sa_mlp.w3.weight": "model-00003-of-00004.safetensors",
623
+ "action_model.model.double_blocks.1.sa_proj.bias": "model-00003-of-00004.safetensors",
624
+ "action_model.model.double_blocks.1.sa_proj.weight": "model-00003-of-00004.safetensors",
625
+ "action_model.model.double_blocks.1.sa_qkv.bias": "model-00003-of-00004.safetensors",
626
+ "action_model.model.double_blocks.1.sa_qkv.weight": "model-00003-of-00004.safetensors",
627
+ "action_model.model.double_blocks.1.vl_mlp.w12.bias": "model-00003-of-00004.safetensors",
628
+ "action_model.model.double_blocks.1.vl_mlp.w12.weight": "model-00003-of-00004.safetensors",
629
+ "action_model.model.double_blocks.1.vl_mlp.w3.bias": "model-00003-of-00004.safetensors",
630
+ "action_model.model.double_blocks.1.vl_mlp.w3.weight": "model-00003-of-00004.safetensors",
631
+ "action_model.model.double_blocks.1.vl_proj.bias": "model-00003-of-00004.safetensors",
632
+ "action_model.model.double_blocks.1.vl_proj.weight": "model-00003-of-00004.safetensors",
633
+ "action_model.model.double_blocks.1.vl_qkv.bias": "model-00003-of-00004.safetensors",
634
+ "action_model.model.double_blocks.1.vl_qkv.weight": "model-00003-of-00004.safetensors",
635
+ "action_model.model.double_blocks.2.k_norm_p.weight": "model-00003-of-00004.safetensors",
636
+ "action_model.model.double_blocks.2.k_norm_sa.weight": "model-00003-of-00004.safetensors",
637
+ "action_model.model.double_blocks.2.k_norm_vl.weight": "model-00003-of-00004.safetensors",
638
+ "action_model.model.double_blocks.2.p_mlp.w12.bias": "model-00003-of-00004.safetensors",
639
+ "action_model.model.double_blocks.2.p_mlp.w12.weight": "model-00003-of-00004.safetensors",
640
+ "action_model.model.double_blocks.2.p_mlp.w3.bias": "model-00003-of-00004.safetensors",
641
+ "action_model.model.double_blocks.2.p_mlp.w3.weight": "model-00003-of-00004.safetensors",
642
+ "action_model.model.double_blocks.2.p_proj.bias": "model-00003-of-00004.safetensors",
643
+ "action_model.model.double_blocks.2.p_proj.weight": "model-00003-of-00004.safetensors",
644
+ "action_model.model.double_blocks.2.p_qkv.bias": "model-00003-of-00004.safetensors",
645
+ "action_model.model.double_blocks.2.p_qkv.weight": "model-00003-of-00004.safetensors",
646
+ "action_model.model.double_blocks.2.q_norm_p.weight": "model-00003-of-00004.safetensors",
647
+ "action_model.model.double_blocks.2.q_norm_sa.weight": "model-00003-of-00004.safetensors",
648
+ "action_model.model.double_blocks.2.q_norm_vl.weight": "model-00003-of-00004.safetensors",
649
+ "action_model.model.double_blocks.2.sa_mlp.w12.bias": "model-00003-of-00004.safetensors",
650
+ "action_model.model.double_blocks.2.sa_mlp.w12.weight": "model-00003-of-00004.safetensors",
651
+ "action_model.model.double_blocks.2.sa_mlp.w3.bias": "model-00003-of-00004.safetensors",
652
+ "action_model.model.double_blocks.2.sa_mlp.w3.weight": "model-00003-of-00004.safetensors",
653
+ "action_model.model.double_blocks.2.sa_proj.bias": "model-00003-of-00004.safetensors",
654
+ "action_model.model.double_blocks.2.sa_proj.weight": "model-00003-of-00004.safetensors",
655
+ "action_model.model.double_blocks.2.sa_qkv.bias": "model-00003-of-00004.safetensors",
656
+ "action_model.model.double_blocks.2.sa_qkv.weight": "model-00003-of-00004.safetensors",
657
+ "action_model.model.double_blocks.2.vl_mlp.w12.bias": "model-00003-of-00004.safetensors",
658
+ "action_model.model.double_blocks.2.vl_mlp.w12.weight": "model-00003-of-00004.safetensors",
659
+ "action_model.model.double_blocks.2.vl_mlp.w3.bias": "model-00003-of-00004.safetensors",
660
+ "action_model.model.double_blocks.2.vl_mlp.w3.weight": "model-00003-of-00004.safetensors",
661
+ "action_model.model.double_blocks.2.vl_proj.bias": "model-00003-of-00004.safetensors",
662
+ "action_model.model.double_blocks.2.vl_proj.weight": "model-00003-of-00004.safetensors",
663
+ "action_model.model.double_blocks.2.vl_qkv.bias": "model-00003-of-00004.safetensors",
664
+ "action_model.model.double_blocks.2.vl_qkv.weight": "model-00003-of-00004.safetensors",
665
+ "action_model.model.double_blocks.3.k_norm_p.weight": "model-00003-of-00004.safetensors",
666
+ "action_model.model.double_blocks.3.k_norm_sa.weight": "model-00003-of-00004.safetensors",
667
+ "action_model.model.double_blocks.3.k_norm_vl.weight": "model-00003-of-00004.safetensors",
668
+ "action_model.model.double_blocks.3.p_mlp.w12.bias": "model-00003-of-00004.safetensors",
669
+ "action_model.model.double_blocks.3.p_mlp.w12.weight": "model-00003-of-00004.safetensors",
670
+ "action_model.model.double_blocks.3.p_mlp.w3.bias": "model-00003-of-00004.safetensors",
671
+ "action_model.model.double_blocks.3.p_mlp.w3.weight": "model-00003-of-00004.safetensors",
672
+ "action_model.model.double_blocks.3.p_proj.bias": "model-00003-of-00004.safetensors",
673
+ "action_model.model.double_blocks.3.p_proj.weight": "model-00003-of-00004.safetensors",
674
+ "action_model.model.double_blocks.3.p_qkv.bias": "model-00003-of-00004.safetensors",
675
+ "action_model.model.double_blocks.3.p_qkv.weight": "model-00003-of-00004.safetensors",
676
+ "action_model.model.double_blocks.3.q_norm_p.weight": "model-00003-of-00004.safetensors",
677
+ "action_model.model.double_blocks.3.q_norm_sa.weight": "model-00003-of-00004.safetensors",
678
+ "action_model.model.double_blocks.3.q_norm_vl.weight": "model-00003-of-00004.safetensors",
679
+ "action_model.model.double_blocks.3.sa_mlp.w12.bias": "model-00003-of-00004.safetensors",
680
+ "action_model.model.double_blocks.3.sa_mlp.w12.weight": "model-00003-of-00004.safetensors",
681
+ "action_model.model.double_blocks.3.sa_mlp.w3.bias": "model-00003-of-00004.safetensors",
682
+ "action_model.model.double_blocks.3.sa_mlp.w3.weight": "model-00003-of-00004.safetensors",
683
+ "action_model.model.double_blocks.3.sa_proj.bias": "model-00003-of-00004.safetensors",
684
+ "action_model.model.double_blocks.3.sa_proj.weight": "model-00003-of-00004.safetensors",
685
+ "action_model.model.double_blocks.3.sa_qkv.bias": "model-00003-of-00004.safetensors",
686
+ "action_model.model.double_blocks.3.sa_qkv.weight": "model-00003-of-00004.safetensors",
687
+ "action_model.model.double_blocks.3.vl_mlp.w12.bias": "model-00003-of-00004.safetensors",
688
+ "action_model.model.double_blocks.3.vl_mlp.w12.weight": "model-00003-of-00004.safetensors",
689
+ "action_model.model.double_blocks.3.vl_mlp.w3.bias": "model-00003-of-00004.safetensors",
690
+ "action_model.model.double_blocks.3.vl_mlp.w3.weight": "model-00003-of-00004.safetensors",
691
+ "action_model.model.double_blocks.3.vl_proj.bias": "model-00003-of-00004.safetensors",
692
+ "action_model.model.double_blocks.3.vl_proj.weight": "model-00003-of-00004.safetensors",
693
+ "action_model.model.double_blocks.3.vl_qkv.bias": "model-00003-of-00004.safetensors",
694
+ "action_model.model.double_blocks.3.vl_qkv.weight": "model-00003-of-00004.safetensors",
695
+ "action_model.model.proj_out_1.bias": "model-00003-of-00004.safetensors",
696
+ "action_model.model.proj_out_1.weight": "model-00003-of-00004.safetensors",
697
+ "action_model.model.proj_out_2.bias": "model-00003-of-00004.safetensors",
698
+ "action_model.model.proj_out_2.weight": "model-00003-of-00004.safetensors",
699
+ "action_model.model.proj_out_physics_1.bias": "model-00003-of-00004.safetensors",
700
+ "action_model.model.proj_out_physics_1.weight": "model-00003-of-00004.safetensors",
701
+ "action_model.model.proj_out_physics_2.bias": "model-00003-of-00004.safetensors",
702
+ "action_model.model.proj_out_physics_2.weight": "model-00003-of-00004.safetensors",
703
+ "action_model.model.single_blocks.0.k_norm.weight": "model-00003-of-00004.safetensors",
704
+ "action_model.model.single_blocks.0.linear1.bias": "model-00003-of-00004.safetensors",
705
+ "action_model.model.single_blocks.0.linear1.weight": "model-00003-of-00004.safetensors",
706
+ "action_model.model.single_blocks.0.linear2.bias": "model-00003-of-00004.safetensors",
707
+ "action_model.model.single_blocks.0.linear2.weight": "model-00003-of-00004.safetensors",
708
+ "action_model.model.single_blocks.0.mlp_proj.bias": "model-00003-of-00004.safetensors",
709
+ "action_model.model.single_blocks.0.mlp_proj.weight": "model-00003-of-00004.safetensors",
710
+ "action_model.model.single_blocks.0.p_k_norm.weight": "model-00003-of-00004.safetensors",
711
+ "action_model.model.single_blocks.0.p_linear1.bias": "model-00003-of-00004.safetensors",
712
+ "action_model.model.single_blocks.0.p_linear1.weight": "model-00003-of-00004.safetensors",
713
+ "action_model.model.single_blocks.0.p_linear2.bias": "model-00003-of-00004.safetensors",
714
+ "action_model.model.single_blocks.0.p_linear2.weight": "model-00003-of-00004.safetensors",
715
+ "action_model.model.single_blocks.0.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
716
+ "action_model.model.single_blocks.0.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
717
+ "action_model.model.single_blocks.0.p_q_norm.weight": "model-00003-of-00004.safetensors",
718
+ "action_model.model.single_blocks.0.q_norm.weight": "model-00003-of-00004.safetensors",
719
+ "action_model.model.single_blocks.1.k_norm.weight": "model-00003-of-00004.safetensors",
720
+ "action_model.model.single_blocks.1.linear1.bias": "model-00003-of-00004.safetensors",
721
+ "action_model.model.single_blocks.1.linear1.weight": "model-00003-of-00004.safetensors",
722
+ "action_model.model.single_blocks.1.linear2.bias": "model-00003-of-00004.safetensors",
723
+ "action_model.model.single_blocks.1.linear2.weight": "model-00003-of-00004.safetensors",
724
+ "action_model.model.single_blocks.1.mlp_proj.bias": "model-00003-of-00004.safetensors",
725
+ "action_model.model.single_blocks.1.mlp_proj.weight": "model-00003-of-00004.safetensors",
726
+ "action_model.model.single_blocks.1.p_k_norm.weight": "model-00003-of-00004.safetensors",
727
+ "action_model.model.single_blocks.1.p_linear1.bias": "model-00003-of-00004.safetensors",
728
+ "action_model.model.single_blocks.1.p_linear1.weight": "model-00003-of-00004.safetensors",
729
+ "action_model.model.single_blocks.1.p_linear2.bias": "model-00003-of-00004.safetensors",
730
+ "action_model.model.single_blocks.1.p_linear2.weight": "model-00003-of-00004.safetensors",
731
+ "action_model.model.single_blocks.1.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
732
+ "action_model.model.single_blocks.1.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
733
+ "action_model.model.single_blocks.1.p_q_norm.weight": "model-00003-of-00004.safetensors",
734
+ "action_model.model.single_blocks.1.q_norm.weight": "model-00003-of-00004.safetensors",
735
+ "action_model.model.single_blocks.2.k_norm.weight": "model-00003-of-00004.safetensors",
736
+ "action_model.model.single_blocks.2.linear1.bias": "model-00003-of-00004.safetensors",
737
+ "action_model.model.single_blocks.2.linear1.weight": "model-00003-of-00004.safetensors",
738
+ "action_model.model.single_blocks.2.linear2.bias": "model-00003-of-00004.safetensors",
739
+ "action_model.model.single_blocks.2.linear2.weight": "model-00003-of-00004.safetensors",
740
+ "action_model.model.single_blocks.2.mlp_proj.bias": "model-00003-of-00004.safetensors",
741
+ "action_model.model.single_blocks.2.mlp_proj.weight": "model-00003-of-00004.safetensors",
742
+ "action_model.model.single_blocks.2.p_k_norm.weight": "model-00003-of-00004.safetensors",
743
+ "action_model.model.single_blocks.2.p_linear1.bias": "model-00003-of-00004.safetensors",
744
+ "action_model.model.single_blocks.2.p_linear1.weight": "model-00003-of-00004.safetensors",
745
+ "action_model.model.single_blocks.2.p_linear2.bias": "model-00003-of-00004.safetensors",
746
+ "action_model.model.single_blocks.2.p_linear2.weight": "model-00003-of-00004.safetensors",
747
+ "action_model.model.single_blocks.2.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
748
+ "action_model.model.single_blocks.2.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
749
+ "action_model.model.single_blocks.2.p_q_norm.weight": "model-00003-of-00004.safetensors",
750
+ "action_model.model.single_blocks.2.q_norm.weight": "model-00003-of-00004.safetensors",
751
+ "action_model.model.single_blocks.3.k_norm.weight": "model-00003-of-00004.safetensors",
752
+ "action_model.model.single_blocks.3.linear1.bias": "model-00003-of-00004.safetensors",
753
+ "action_model.model.single_blocks.3.linear1.weight": "model-00003-of-00004.safetensors",
754
+ "action_model.model.single_blocks.3.linear2.bias": "model-00003-of-00004.safetensors",
755
+ "action_model.model.single_blocks.3.linear2.weight": "model-00003-of-00004.safetensors",
756
+ "action_model.model.single_blocks.3.mlp_proj.bias": "model-00003-of-00004.safetensors",
757
+ "action_model.model.single_blocks.3.mlp_proj.weight": "model-00003-of-00004.safetensors",
758
+ "action_model.model.single_blocks.3.p_k_norm.weight": "model-00003-of-00004.safetensors",
759
+ "action_model.model.single_blocks.3.p_linear1.bias": "model-00003-of-00004.safetensors",
760
+ "action_model.model.single_blocks.3.p_linear1.weight": "model-00003-of-00004.safetensors",
761
+ "action_model.model.single_blocks.3.p_linear2.bias": "model-00003-of-00004.safetensors",
762
+ "action_model.model.single_blocks.3.p_linear2.weight": "model-00003-of-00004.safetensors",
763
+ "action_model.model.single_blocks.3.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
764
+ "action_model.model.single_blocks.3.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
765
+ "action_model.model.single_blocks.3.p_q_norm.weight": "model-00003-of-00004.safetensors",
766
+ "action_model.model.single_blocks.3.q_norm.weight": "model-00003-of-00004.safetensors",
767
+ "action_model.model.single_blocks.4.k_norm.weight": "model-00003-of-00004.safetensors",
768
+ "action_model.model.single_blocks.4.linear1.bias": "model-00003-of-00004.safetensors",
769
+ "action_model.model.single_blocks.4.linear1.weight": "model-00003-of-00004.safetensors",
770
+ "action_model.model.single_blocks.4.linear2.bias": "model-00003-of-00004.safetensors",
771
+ "action_model.model.single_blocks.4.linear2.weight": "model-00003-of-00004.safetensors",
772
+ "action_model.model.single_blocks.4.mlp_proj.bias": "model-00003-of-00004.safetensors",
773
+ "action_model.model.single_blocks.4.mlp_proj.weight": "model-00003-of-00004.safetensors",
774
+ "action_model.model.single_blocks.4.p_k_norm.weight": "model-00003-of-00004.safetensors",
775
+ "action_model.model.single_blocks.4.p_linear1.bias": "model-00003-of-00004.safetensors",
776
+ "action_model.model.single_blocks.4.p_linear1.weight": "model-00003-of-00004.safetensors",
777
+ "action_model.model.single_blocks.4.p_linear2.bias": "model-00003-of-00004.safetensors",
778
+ "action_model.model.single_blocks.4.p_linear2.weight": "model-00003-of-00004.safetensors",
779
+ "action_model.model.single_blocks.4.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
780
+ "action_model.model.single_blocks.4.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
781
+ "action_model.model.single_blocks.4.p_q_norm.weight": "model-00003-of-00004.safetensors",
782
+ "action_model.model.single_blocks.4.q_norm.weight": "model-00003-of-00004.safetensors",
783
+ "action_model.model.single_blocks.5.k_norm.weight": "model-00003-of-00004.safetensors",
784
+ "action_model.model.single_blocks.5.linear1.bias": "model-00003-of-00004.safetensors",
785
+ "action_model.model.single_blocks.5.linear1.weight": "model-00003-of-00004.safetensors",
786
+ "action_model.model.single_blocks.5.linear2.bias": "model-00003-of-00004.safetensors",
787
+ "action_model.model.single_blocks.5.linear2.weight": "model-00003-of-00004.safetensors",
788
+ "action_model.model.single_blocks.5.mlp_proj.bias": "model-00003-of-00004.safetensors",
789
+ "action_model.model.single_blocks.5.mlp_proj.weight": "model-00003-of-00004.safetensors",
790
+ "action_model.model.single_blocks.5.p_k_norm.weight": "model-00003-of-00004.safetensors",
791
+ "action_model.model.single_blocks.5.p_linear1.bias": "model-00003-of-00004.safetensors",
792
+ "action_model.model.single_blocks.5.p_linear1.weight": "model-00003-of-00004.safetensors",
793
+ "action_model.model.single_blocks.5.p_linear2.bias": "model-00003-of-00004.safetensors",
794
+ "action_model.model.single_blocks.5.p_linear2.weight": "model-00003-of-00004.safetensors",
795
+ "action_model.model.single_blocks.5.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
796
+ "action_model.model.single_blocks.5.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
797
+ "action_model.model.single_blocks.5.p_q_norm.weight": "model-00003-of-00004.safetensors",
798
+ "action_model.model.single_blocks.5.q_norm.weight": "model-00003-of-00004.safetensors",
799
+ "action_model.model.single_blocks.6.k_norm.weight": "model-00003-of-00004.safetensors",
800
+ "action_model.model.single_blocks.6.linear1.bias": "model-00003-of-00004.safetensors",
801
+ "action_model.model.single_blocks.6.linear1.weight": "model-00003-of-00004.safetensors",
802
+ "action_model.model.single_blocks.6.linear2.bias": "model-00003-of-00004.safetensors",
803
+ "action_model.model.single_blocks.6.linear2.weight": "model-00003-of-00004.safetensors",
804
+ "action_model.model.single_blocks.6.mlp_proj.bias": "model-00003-of-00004.safetensors",
805
+ "action_model.model.single_blocks.6.mlp_proj.weight": "model-00003-of-00004.safetensors",
806
+ "action_model.model.single_blocks.6.p_k_norm.weight": "model-00003-of-00004.safetensors",
807
+ "action_model.model.single_blocks.6.p_linear1.bias": "model-00003-of-00004.safetensors",
808
+ "action_model.model.single_blocks.6.p_linear1.weight": "model-00003-of-00004.safetensors",
809
+ "action_model.model.single_blocks.6.p_linear2.bias": "model-00003-of-00004.safetensors",
810
+ "action_model.model.single_blocks.6.p_linear2.weight": "model-00003-of-00004.safetensors",
811
+ "action_model.model.single_blocks.6.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
812
+ "action_model.model.single_blocks.6.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
813
+ "action_model.model.single_blocks.6.p_q_norm.weight": "model-00003-of-00004.safetensors",
814
+ "action_model.model.single_blocks.6.q_norm.weight": "model-00003-of-00004.safetensors",
815
+ "action_model.model.single_blocks.7.k_norm.weight": "model-00003-of-00004.safetensors",
816
+ "action_model.model.single_blocks.7.linear1.bias": "model-00003-of-00004.safetensors",
817
+ "action_model.model.single_blocks.7.linear1.weight": "model-00003-of-00004.safetensors",
818
+ "action_model.model.single_blocks.7.linear2.bias": "model-00003-of-00004.safetensors",
819
+ "action_model.model.single_blocks.7.linear2.weight": "model-00003-of-00004.safetensors",
820
+ "action_model.model.single_blocks.7.mlp_proj.bias": "model-00003-of-00004.safetensors",
821
+ "action_model.model.single_blocks.7.mlp_proj.weight": "model-00003-of-00004.safetensors",
822
+ "action_model.model.single_blocks.7.p_k_norm.weight": "model-00003-of-00004.safetensors",
823
+ "action_model.model.single_blocks.7.p_linear1.bias": "model-00003-of-00004.safetensors",
824
+ "action_model.model.single_blocks.7.p_linear1.weight": "model-00003-of-00004.safetensors",
825
+ "action_model.model.single_blocks.7.p_linear2.bias": "model-00003-of-00004.safetensors",
826
+ "action_model.model.single_blocks.7.p_linear2.weight": "model-00003-of-00004.safetensors",
827
+ "action_model.model.single_blocks.7.p_mlp_proj.bias": "model-00003-of-00004.safetensors",
828
+ "action_model.model.single_blocks.7.p_mlp_proj.weight": "model-00003-of-00004.safetensors",
829
+ "action_model.model.single_blocks.7.p_q_norm.weight": "model-00003-of-00004.safetensors",
830
+ "action_model.model.single_blocks.7.q_norm.weight": "model-00003-of-00004.safetensors",
831
+ "action_model.model.timestep_encoder.timestep_embedder.linear_1.bias": "model-00003-of-00004.safetensors",
832
+ "action_model.model.timestep_encoder.timestep_embedder.linear_1.weight": "model-00003-of-00004.safetensors",
833
+ "action_model.model.timestep_encoder.timestep_embedder.linear_2.bias": "model-00003-of-00004.safetensors",
834
+ "action_model.model.timestep_encoder.timestep_embedder.linear_2.weight": "model-00003-of-00004.safetensors",
835
+ "action_model.model.vl_proj_to_sa.bias": "model-00003-of-00004.safetensors",
836
+ "action_model.model.vl_proj_to_sa.weight": "model-00003-of-00004.safetensors",
837
+ "action_model.physics.physics_mask_token": "model-00003-of-00004.safetensors",
838
+ "action_model.state_encoder.layer1.W": "model-00003-of-00004.safetensors",
839
+ "action_model.state_encoder.layer1.b": "model-00003-of-00004.safetensors",
840
+ "backbone.qwen_model.lm_head.weight": "model-00003-of-00004.safetensors",
841
+ "action_model.action_decoder.layer1.W": "model-00004-of-00004.safetensors",
842
+ "action_model.action_decoder.layer1.b": "model-00004-of-00004.safetensors",
843
+ "action_model.action_decoder.layer2.W": "model-00004-of-00004.safetensors",
844
+ "action_model.action_decoder.layer2.b": "model-00004-of-00004.safetensors",
845
+ "action_model.action_encoder.W1.W": "model-00004-of-00004.safetensors",
846
+ "action_model.action_encoder.W1.b": "model-00004-of-00004.safetensors",
847
+ "action_model.action_encoder.W2.W": "model-00004-of-00004.safetensors",
848
+ "action_model.action_encoder.W2.b": "model-00004-of-00004.safetensors",
849
+ "action_model.action_encoder.W3.W": "model-00004-of-00004.safetensors",
850
+ "action_model.action_encoder.W3.b": "model-00004-of-00004.safetensors",
851
+ "action_model.physics.physics_cond_encoder.W1.bias": "model-00004-of-00004.safetensors",
852
+ "action_model.physics.physics_cond_encoder.W1.weight": "model-00004-of-00004.safetensors",
853
+ "action_model.physics.physics_cond_encoder.W2.bias": "model-00004-of-00004.safetensors",
854
+ "action_model.physics.physics_cond_encoder.W2.weight": "model-00004-of-00004.safetensors",
855
+ "action_model.physics.physics_cond_encoder.W3.bias": "model-00004-of-00004.safetensors",
856
+ "action_model.physics.physics_cond_encoder.W3.weight": "model-00004-of-00004.safetensors",
857
+ "action_model.physics.physics_decoder.net.0.bias": "model-00004-of-00004.safetensors",
858
+ "action_model.physics.physics_decoder.net.0.weight": "model-00004-of-00004.safetensors",
859
+ "action_model.physics.physics_decoder.net.2.bias": "model-00004-of-00004.safetensors",
860
+ "action_model.physics.physics_decoder.net.2.weight": "model-00004-of-00004.safetensors",
861
+ "action_model.physics.physics_fut_encoder.W1.bias": "model-00004-of-00004.safetensors",
862
+ "action_model.physics.physics_fut_encoder.W1.weight": "model-00004-of-00004.safetensors",
863
+ "action_model.physics.physics_fut_encoder.W2.bias": "model-00004-of-00004.safetensors",
864
+ "action_model.physics.physics_fut_encoder.W2.weight": "model-00004-of-00004.safetensors",
865
+ "action_model.physics.physics_fut_encoder.W3.bias": "model-00004-of-00004.safetensors",
866
+ "action_model.physics.physics_fut_encoder.W3.weight": "model-00004-of-00004.safetensors",
867
+ "action_model.position_embedding.weight": "model-00004-of-00004.safetensors",
868
+ "action_model.state_encoder.layer2.W": "model-00004-of-00004.safetensors",
869
+ "action_model.state_encoder.layer2.b": "model-00004-of-00004.safetensors",
870
+ "memory.layers.0.input_layernorm.weight": "model-00004-of-00004.safetensors",
871
+ "memory.layers.0.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
872
+ "memory.layers.0.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
873
+ "memory.layers.0.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
874
+ "memory.layers.0.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
875
+ "memory.layers.0.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
876
+ "memory.layers.0.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
877
+ "memory.layers.0.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
878
+ "memory.layers.0.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
879
+ "memory.layers.1.input_layernorm.weight": "model-00004-of-00004.safetensors",
880
+ "memory.layers.1.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
881
+ "memory.layers.1.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
882
+ "memory.layers.1.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
883
+ "memory.layers.1.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
884
+ "memory.layers.1.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
885
+ "memory.layers.1.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
886
+ "memory.layers.1.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
887
+ "memory.layers.1.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
888
+ "memory.norm.weight": "model-00004-of-00004.safetensors"
889
+ }
890
+ }
processor_config.json ADDED
@@ -0,0 +1,3048 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "processor_class": "RLDXProcessor",
3
+ "processor_kwargs": {
4
+ "modality_configs": {
5
+ "bridge_orig": {
6
+ "video": {
7
+ "delta_indices": [
8
+ -6,
9
+ -4,
10
+ -2,
11
+ 0
12
+ ],
13
+ "modality_keys": [
14
+ "primary",
15
+ "secondary"
16
+ ],
17
+ "sin_cos_embedding_keys": null,
18
+ "mean_std_embedding_keys": null,
19
+ "action_configs": null
20
+ },
21
+ "state": {
22
+ "delta_indices": [
23
+ 0
24
+ ],
25
+ "modality_keys": [
26
+ "end_effector_position",
27
+ "end_effector_rotation",
28
+ "gripper_position"
29
+ ],
30
+ "sin_cos_embedding_keys": null,
31
+ "mean_std_embedding_keys": null,
32
+ "action_configs": null
33
+ },
34
+ "action": {
35
+ "delta_indices": [
36
+ 0,
37
+ 1,
38
+ 2,
39
+ 3,
40
+ 4,
41
+ 5,
42
+ 6,
43
+ 7,
44
+ 8,
45
+ 9,
46
+ 10,
47
+ 11,
48
+ 12,
49
+ 13,
50
+ 14,
51
+ 15
52
+ ],
53
+ "modality_keys": [
54
+ "end_effector_position",
55
+ "end_effector_rotation",
56
+ "gripper_close"
57
+ ],
58
+ "sin_cos_embedding_keys": null,
59
+ "mean_std_embedding_keys": null,
60
+ "action_configs": [
61
+ {
62
+ "rep": "DELTA",
63
+ "type": "EEF",
64
+ "format": "DEFAULT",
65
+ "state_key": null
66
+ },
67
+ {
68
+ "rep": "DELTA",
69
+ "type": "EEF",
70
+ "format": "DEFAULT",
71
+ "state_key": null
72
+ },
73
+ {
74
+ "rep": "ABSOLUTE",
75
+ "type": "NON_EEF",
76
+ "format": "DEFAULT",
77
+ "state_key": null
78
+ }
79
+ ]
80
+ },
81
+ "language": {
82
+ "delta_indices": [
83
+ 0
84
+ ],
85
+ "modality_keys": [
86
+ "annotation.human.action.task_description"
87
+ ],
88
+ "sin_cos_embedding_keys": null,
89
+ "mean_std_embedding_keys": null,
90
+ "action_configs": null
91
+ }
92
+ },
93
+ "iamlab_cmu_pickup_insert_converted_externally_to_rlds": {
94
+ "video": {
95
+ "delta_indices": [
96
+ -6,
97
+ -4,
98
+ -2,
99
+ 0
100
+ ],
101
+ "modality_keys": [
102
+ "primary",
103
+ "wrist"
104
+ ],
105
+ "sin_cos_embedding_keys": null,
106
+ "mean_std_embedding_keys": null,
107
+ "action_configs": null
108
+ },
109
+ "state": {
110
+ "delta_indices": [
111
+ 0
112
+ ],
113
+ "modality_keys": [
114
+ "end_effector_position",
115
+ "end_effector_rotation",
116
+ "gripper_position"
117
+ ],
118
+ "sin_cos_embedding_keys": null,
119
+ "mean_std_embedding_keys": null,
120
+ "action_configs": null
121
+ },
122
+ "action": {
123
+ "delta_indices": [
124
+ 0,
125
+ 1,
126
+ 2,
127
+ 3,
128
+ 4,
129
+ 5,
130
+ 6,
131
+ 7,
132
+ 8,
133
+ 9,
134
+ 10,
135
+ 11,
136
+ 12,
137
+ 13,
138
+ 14,
139
+ 15
140
+ ],
141
+ "modality_keys": [
142
+ "end_effector_position",
143
+ "end_effector_rotation",
144
+ "gripper_close"
145
+ ],
146
+ "sin_cos_embedding_keys": null,
147
+ "mean_std_embedding_keys": null,
148
+ "action_configs": [
149
+ {
150
+ "rep": "DELTA",
151
+ "type": "EEF",
152
+ "format": "DEFAULT",
153
+ "state_key": null
154
+ },
155
+ {
156
+ "rep": "DELTA",
157
+ "type": "EEF",
158
+ "format": "DEFAULT",
159
+ "state_key": null
160
+ },
161
+ {
162
+ "rep": "ABSOLUTE",
163
+ "type": "NON_EEF",
164
+ "format": "DEFAULT",
165
+ "state_key": null
166
+ }
167
+ ]
168
+ },
169
+ "language": {
170
+ "delta_indices": [
171
+ 0
172
+ ],
173
+ "modality_keys": [
174
+ "annotation.human.action.task_description"
175
+ ],
176
+ "sin_cos_embedding_keys": null,
177
+ "mean_std_embedding_keys": null,
178
+ "action_configs": null
179
+ }
180
+ },
181
+ "humanoid_everyday_g1": {
182
+ "video": {
183
+ "delta_indices": [
184
+ -6,
185
+ -4,
186
+ -2,
187
+ 0
188
+ ],
189
+ "modality_keys": [
190
+ "egocentric_resized"
191
+ ],
192
+ "sin_cos_embedding_keys": null,
193
+ "mean_std_embedding_keys": null,
194
+ "action_configs": null
195
+ },
196
+ "state": {
197
+ "delta_indices": [
198
+ 0
199
+ ],
200
+ "modality_keys": [
201
+ "left_arm",
202
+ "left_hand",
203
+ "right_arm",
204
+ "right_hand"
205
+ ],
206
+ "sin_cos_embedding_keys": null,
207
+ "mean_std_embedding_keys": null,
208
+ "action_configs": null
209
+ },
210
+ "action": {
211
+ "delta_indices": [
212
+ 0,
213
+ 1,
214
+ 2,
215
+ 3,
216
+ 4,
217
+ 5,
218
+ 6,
219
+ 7,
220
+ 8,
221
+ 9,
222
+ 10,
223
+ 11,
224
+ 12,
225
+ 13,
226
+ 14,
227
+ 15
228
+ ],
229
+ "modality_keys": [
230
+ "left_arm",
231
+ "left_hand",
232
+ "right_arm",
233
+ "right_hand"
234
+ ],
235
+ "sin_cos_embedding_keys": null,
236
+ "mean_std_embedding_keys": null,
237
+ "action_configs": [
238
+ {
239
+ "rep": "ABSOLUTE",
240
+ "type": "NON_EEF",
241
+ "format": "DEFAULT",
242
+ "state_key": null
243
+ },
244
+ {
245
+ "rep": "ABSOLUTE",
246
+ "type": "NON_EEF",
247
+ "format": "DEFAULT",
248
+ "state_key": null
249
+ },
250
+ {
251
+ "rep": "ABSOLUTE",
252
+ "type": "NON_EEF",
253
+ "format": "DEFAULT",
254
+ "state_key": null
255
+ },
256
+ {
257
+ "rep": "ABSOLUTE",
258
+ "type": "NON_EEF",
259
+ "format": "DEFAULT",
260
+ "state_key": null
261
+ }
262
+ ]
263
+ },
264
+ "language": {
265
+ "delta_indices": [
266
+ 0
267
+ ],
268
+ "modality_keys": [
269
+ "annotation.human.action.task_description"
270
+ ],
271
+ "sin_cos_embedding_keys": null,
272
+ "mean_std_embedding_keys": null,
273
+ "action_configs": null
274
+ }
275
+ },
276
+ "dlr_edan_shared_control_converted_externally_to_rlds": {
277
+ "video": {
278
+ "delta_indices": [
279
+ -6,
280
+ -4,
281
+ -2,
282
+ 0
283
+ ],
284
+ "modality_keys": [
285
+ "primary"
286
+ ],
287
+ "sin_cos_embedding_keys": null,
288
+ "mean_std_embedding_keys": null,
289
+ "action_configs": null
290
+ },
291
+ "state": {
292
+ "delta_indices": [
293
+ 0
294
+ ],
295
+ "modality_keys": [
296
+ "end_effector_position",
297
+ "end_effector_rotation",
298
+ "gripper_position"
299
+ ],
300
+ "sin_cos_embedding_keys": null,
301
+ "mean_std_embedding_keys": null,
302
+ "action_configs": null
303
+ },
304
+ "action": {
305
+ "delta_indices": [
306
+ 0,
307
+ 1,
308
+ 2,
309
+ 3,
310
+ 4,
311
+ 5,
312
+ 6,
313
+ 7,
314
+ 8,
315
+ 9,
316
+ 10,
317
+ 11,
318
+ 12,
319
+ 13,
320
+ 14,
321
+ 15
322
+ ],
323
+ "modality_keys": [
324
+ "end_effector_position",
325
+ "end_effector_rotation",
326
+ "gripper_close"
327
+ ],
328
+ "sin_cos_embedding_keys": null,
329
+ "mean_std_embedding_keys": null,
330
+ "action_configs": [
331
+ {
332
+ "rep": "DELTA",
333
+ "type": "EEF",
334
+ "format": "DEFAULT",
335
+ "state_key": null
336
+ },
337
+ {
338
+ "rep": "DELTA",
339
+ "type": "EEF",
340
+ "format": "DEFAULT",
341
+ "state_key": null
342
+ },
343
+ {
344
+ "rep": "ABSOLUTE",
345
+ "type": "NON_EEF",
346
+ "format": "DEFAULT",
347
+ "state_key": null
348
+ }
349
+ ]
350
+ },
351
+ "language": {
352
+ "delta_indices": [
353
+ 0
354
+ ],
355
+ "modality_keys": [
356
+ "annotation.human.action.task_description"
357
+ ],
358
+ "sin_cos_embedding_keys": null,
359
+ "mean_std_embedding_keys": null,
360
+ "action_configs": null
361
+ }
362
+ },
363
+ "austin_sailor_dataset_converted_externally_to_rlds": {
364
+ "video": {
365
+ "delta_indices": [
366
+ -6,
367
+ -4,
368
+ -2,
369
+ 0
370
+ ],
371
+ "modality_keys": [
372
+ "primary",
373
+ "wrist"
374
+ ],
375
+ "sin_cos_embedding_keys": null,
376
+ "mean_std_embedding_keys": null,
377
+ "action_configs": null
378
+ },
379
+ "state": {
380
+ "delta_indices": [
381
+ 0
382
+ ],
383
+ "modality_keys": [
384
+ "end_effector_position",
385
+ "end_effector_rotation",
386
+ "gripper_position"
387
+ ],
388
+ "sin_cos_embedding_keys": null,
389
+ "mean_std_embedding_keys": null,
390
+ "action_configs": null
391
+ },
392
+ "action": {
393
+ "delta_indices": [
394
+ 0,
395
+ 1,
396
+ 2,
397
+ 3,
398
+ 4,
399
+ 5,
400
+ 6,
401
+ 7,
402
+ 8,
403
+ 9,
404
+ 10,
405
+ 11,
406
+ 12,
407
+ 13,
408
+ 14,
409
+ 15
410
+ ],
411
+ "modality_keys": [
412
+ "end_effector_position",
413
+ "end_effector_rotation",
414
+ "gripper_close"
415
+ ],
416
+ "sin_cos_embedding_keys": null,
417
+ "mean_std_embedding_keys": null,
418
+ "action_configs": [
419
+ {
420
+ "rep": "DELTA",
421
+ "type": "EEF",
422
+ "format": "DEFAULT",
423
+ "state_key": null
424
+ },
425
+ {
426
+ "rep": "DELTA",
427
+ "type": "EEF",
428
+ "format": "DEFAULT",
429
+ "state_key": null
430
+ },
431
+ {
432
+ "rep": "ABSOLUTE",
433
+ "type": "NON_EEF",
434
+ "format": "DEFAULT",
435
+ "state_key": null
436
+ }
437
+ ]
438
+ },
439
+ "language": {
440
+ "delta_indices": [
441
+ 0
442
+ ],
443
+ "modality_keys": [
444
+ "annotation.human.action.task_description"
445
+ ],
446
+ "sin_cos_embedding_keys": null,
447
+ "mean_std_embedding_keys": null,
448
+ "action_configs": null
449
+ }
450
+ },
451
+ "berkeley_autolab_ur5": {
452
+ "video": {
453
+ "delta_indices": [
454
+ -6,
455
+ -4,
456
+ -2,
457
+ 0
458
+ ],
459
+ "modality_keys": [
460
+ "primary",
461
+ "wrist"
462
+ ],
463
+ "sin_cos_embedding_keys": null,
464
+ "mean_std_embedding_keys": null,
465
+ "action_configs": null
466
+ },
467
+ "state": {
468
+ "delta_indices": [
469
+ 0
470
+ ],
471
+ "modality_keys": [
472
+ "end_effector_position",
473
+ "end_effector_rotation",
474
+ "gripper_position"
475
+ ],
476
+ "sin_cos_embedding_keys": null,
477
+ "mean_std_embedding_keys": null,
478
+ "action_configs": null
479
+ },
480
+ "action": {
481
+ "delta_indices": [
482
+ 0,
483
+ 1,
484
+ 2,
485
+ 3,
486
+ 4,
487
+ 5,
488
+ 6,
489
+ 7,
490
+ 8,
491
+ 9,
492
+ 10,
493
+ 11,
494
+ 12,
495
+ 13,
496
+ 14,
497
+ 15
498
+ ],
499
+ "modality_keys": [
500
+ "end_effector_position",
501
+ "end_effector_rotation",
502
+ "gripper_close"
503
+ ],
504
+ "sin_cos_embedding_keys": null,
505
+ "mean_std_embedding_keys": null,
506
+ "action_configs": [
507
+ {
508
+ "rep": "DELTA",
509
+ "type": "EEF",
510
+ "format": "DEFAULT",
511
+ "state_key": null
512
+ },
513
+ {
514
+ "rep": "DELTA",
515
+ "type": "EEF",
516
+ "format": "DEFAULT",
517
+ "state_key": null
518
+ },
519
+ {
520
+ "rep": "ABSOLUTE",
521
+ "type": "NON_EEF",
522
+ "format": "DEFAULT",
523
+ "state_key": null
524
+ }
525
+ ]
526
+ },
527
+ "language": {
528
+ "delta_indices": [
529
+ 0
530
+ ],
531
+ "modality_keys": [
532
+ "annotation.human.action.task_description"
533
+ ],
534
+ "sin_cos_embedding_keys": null,
535
+ "mean_std_embedding_keys": null,
536
+ "action_configs": null
537
+ }
538
+ },
539
+ "fractal20220817_data": {
540
+ "video": {
541
+ "delta_indices": [
542
+ -6,
543
+ -4,
544
+ -2,
545
+ 0
546
+ ],
547
+ "modality_keys": [
548
+ "primary"
549
+ ],
550
+ "sin_cos_embedding_keys": null,
551
+ "mean_std_embedding_keys": null,
552
+ "action_configs": null
553
+ },
554
+ "state": {
555
+ "delta_indices": [
556
+ 0
557
+ ],
558
+ "modality_keys": [
559
+ "end_effector_position",
560
+ "end_effector_rotation",
561
+ "gripper_position"
562
+ ],
563
+ "sin_cos_embedding_keys": null,
564
+ "mean_std_embedding_keys": null,
565
+ "action_configs": null
566
+ },
567
+ "action": {
568
+ "delta_indices": [
569
+ 0,
570
+ 1,
571
+ 2,
572
+ 3,
573
+ 4,
574
+ 5,
575
+ 6,
576
+ 7,
577
+ 8,
578
+ 9,
579
+ 10,
580
+ 11,
581
+ 12,
582
+ 13,
583
+ 14,
584
+ 15
585
+ ],
586
+ "modality_keys": [
587
+ "end_effector_position",
588
+ "end_effector_rotation",
589
+ "gripper_close"
590
+ ],
591
+ "sin_cos_embedding_keys": null,
592
+ "mean_std_embedding_keys": null,
593
+ "action_configs": [
594
+ {
595
+ "rep": "DELTA",
596
+ "type": "EEF",
597
+ "format": "DEFAULT",
598
+ "state_key": null
599
+ },
600
+ {
601
+ "rep": "DELTA",
602
+ "type": "EEF",
603
+ "format": "DEFAULT",
604
+ "state_key": null
605
+ },
606
+ {
607
+ "rep": "ABSOLUTE",
608
+ "type": "NON_EEF",
609
+ "format": "DEFAULT",
610
+ "state_key": null
611
+ }
612
+ ]
613
+ },
614
+ "language": {
615
+ "delta_indices": [
616
+ 0
617
+ ],
618
+ "modality_keys": [
619
+ "annotation.human.action.task_description"
620
+ ],
621
+ "sin_cos_embedding_keys": null,
622
+ "mean_std_embedding_keys": null,
623
+ "action_configs": null
624
+ }
625
+ },
626
+ "cmu_stretch": {
627
+ "video": {
628
+ "delta_indices": [
629
+ -6,
630
+ -4,
631
+ -2,
632
+ 0
633
+ ],
634
+ "modality_keys": [
635
+ "primary"
636
+ ],
637
+ "sin_cos_embedding_keys": null,
638
+ "mean_std_embedding_keys": null,
639
+ "action_configs": null
640
+ },
641
+ "state": {
642
+ "delta_indices": [
643
+ 0
644
+ ],
645
+ "modality_keys": [
646
+ "end_effector_position",
647
+ "end_effector_rotation",
648
+ "gripper_position"
649
+ ],
650
+ "sin_cos_embedding_keys": null,
651
+ "mean_std_embedding_keys": null,
652
+ "action_configs": null
653
+ },
654
+ "action": {
655
+ "delta_indices": [
656
+ 0,
657
+ 1,
658
+ 2,
659
+ 3,
660
+ 4,
661
+ 5,
662
+ 6,
663
+ 7,
664
+ 8,
665
+ 9,
666
+ 10,
667
+ 11,
668
+ 12,
669
+ 13,
670
+ 14,
671
+ 15
672
+ ],
673
+ "modality_keys": [
674
+ "end_effector_position",
675
+ "end_effector_rotation",
676
+ "gripper_close"
677
+ ],
678
+ "sin_cos_embedding_keys": null,
679
+ "mean_std_embedding_keys": null,
680
+ "action_configs": [
681
+ {
682
+ "rep": "DELTA",
683
+ "type": "EEF",
684
+ "format": "DEFAULT",
685
+ "state_key": null
686
+ },
687
+ {
688
+ "rep": "DELTA",
689
+ "type": "EEF",
690
+ "format": "DEFAULT",
691
+ "state_key": null
692
+ },
693
+ {
694
+ "rep": "ABSOLUTE",
695
+ "type": "NON_EEF",
696
+ "format": "DEFAULT",
697
+ "state_key": null
698
+ }
699
+ ]
700
+ },
701
+ "language": {
702
+ "delta_indices": [
703
+ 0
704
+ ],
705
+ "modality_keys": [
706
+ "annotation.human.action.task_description"
707
+ ],
708
+ "sin_cos_embedding_keys": null,
709
+ "mean_std_embedding_keys": null,
710
+ "action_configs": null
711
+ }
712
+ },
713
+ "berkeley_cable_routing": {
714
+ "video": {
715
+ "delta_indices": [
716
+ -6,
717
+ -4,
718
+ -2,
719
+ 0
720
+ ],
721
+ "modality_keys": [
722
+ "primary",
723
+ "secondary",
724
+ "wrist"
725
+ ],
726
+ "sin_cos_embedding_keys": null,
727
+ "mean_std_embedding_keys": null,
728
+ "action_configs": null
729
+ },
730
+ "state": {
731
+ "delta_indices": [
732
+ 0
733
+ ],
734
+ "modality_keys": [
735
+ "joint_position"
736
+ ],
737
+ "sin_cos_embedding_keys": null,
738
+ "mean_std_embedding_keys": null,
739
+ "action_configs": null
740
+ },
741
+ "action": {
742
+ "delta_indices": [
743
+ 0,
744
+ 1,
745
+ 2,
746
+ 3,
747
+ 4,
748
+ 5,
749
+ 6,
750
+ 7,
751
+ 8,
752
+ 9,
753
+ 10,
754
+ 11,
755
+ 12,
756
+ 13,
757
+ 14,
758
+ 15
759
+ ],
760
+ "modality_keys": [
761
+ "end_effector_position",
762
+ "end_effector_rotation",
763
+ "gripper_close"
764
+ ],
765
+ "sin_cos_embedding_keys": null,
766
+ "mean_std_embedding_keys": null,
767
+ "action_configs": [
768
+ {
769
+ "rep": "DELTA",
770
+ "type": "EEF",
771
+ "format": "DEFAULT",
772
+ "state_key": null
773
+ },
774
+ {
775
+ "rep": "DELTA",
776
+ "type": "EEF",
777
+ "format": "DEFAULT",
778
+ "state_key": null
779
+ },
780
+ {
781
+ "rep": "ABSOLUTE",
782
+ "type": "NON_EEF",
783
+ "format": "DEFAULT",
784
+ "state_key": null
785
+ }
786
+ ]
787
+ },
788
+ "language": {
789
+ "delta_indices": [
790
+ 0
791
+ ],
792
+ "modality_keys": [
793
+ "annotation.human.action.task_description"
794
+ ],
795
+ "sin_cos_embedding_keys": null,
796
+ "mean_std_embedding_keys": null,
797
+ "action_configs": null
798
+ }
799
+ },
800
+ "stanford_hydra_dataset_converted_externally_to_rlds": {
801
+ "video": {
802
+ "delta_indices": [
803
+ -6,
804
+ -4,
805
+ -2,
806
+ 0
807
+ ],
808
+ "modality_keys": [
809
+ "primary",
810
+ "wrist"
811
+ ],
812
+ "sin_cos_embedding_keys": null,
813
+ "mean_std_embedding_keys": null,
814
+ "action_configs": null
815
+ },
816
+ "state": {
817
+ "delta_indices": [
818
+ 0
819
+ ],
820
+ "modality_keys": [
821
+ "end_effector_position",
822
+ "end_effector_rotation",
823
+ "gripper_position"
824
+ ],
825
+ "sin_cos_embedding_keys": null,
826
+ "mean_std_embedding_keys": null,
827
+ "action_configs": null
828
+ },
829
+ "action": {
830
+ "delta_indices": [
831
+ 0,
832
+ 1,
833
+ 2,
834
+ 3,
835
+ 4,
836
+ 5,
837
+ 6,
838
+ 7,
839
+ 8,
840
+ 9,
841
+ 10,
842
+ 11,
843
+ 12,
844
+ 13,
845
+ 14,
846
+ 15
847
+ ],
848
+ "modality_keys": [
849
+ "end_effector_position",
850
+ "end_effector_rotation",
851
+ "gripper_close"
852
+ ],
853
+ "sin_cos_embedding_keys": null,
854
+ "mean_std_embedding_keys": null,
855
+ "action_configs": [
856
+ {
857
+ "rep": "DELTA",
858
+ "type": "EEF",
859
+ "format": "DEFAULT",
860
+ "state_key": null
861
+ },
862
+ {
863
+ "rep": "DELTA",
864
+ "type": "EEF",
865
+ "format": "DEFAULT",
866
+ "state_key": null
867
+ },
868
+ {
869
+ "rep": "ABSOLUTE",
870
+ "type": "NON_EEF",
871
+ "format": "DEFAULT",
872
+ "state_key": null
873
+ }
874
+ ]
875
+ },
876
+ "language": {
877
+ "delta_indices": [
878
+ 0
879
+ ],
880
+ "modality_keys": [
881
+ "annotation.human.action.task_description"
882
+ ],
883
+ "sin_cos_embedding_keys": null,
884
+ "mean_std_embedding_keys": null,
885
+ "action_configs": null
886
+ }
887
+ },
888
+ "utaustin_mutex": {
889
+ "video": {
890
+ "delta_indices": [
891
+ -6,
892
+ -4,
893
+ -2,
894
+ 0
895
+ ],
896
+ "modality_keys": [
897
+ "primary",
898
+ "wrist"
899
+ ],
900
+ "sin_cos_embedding_keys": null,
901
+ "mean_std_embedding_keys": null,
902
+ "action_configs": null
903
+ },
904
+ "state": {
905
+ "delta_indices": [
906
+ 0
907
+ ],
908
+ "modality_keys": [
909
+ "joint_position",
910
+ "gripper_position"
911
+ ],
912
+ "sin_cos_embedding_keys": null,
913
+ "mean_std_embedding_keys": null,
914
+ "action_configs": null
915
+ },
916
+ "action": {
917
+ "delta_indices": [
918
+ 0,
919
+ 1,
920
+ 2,
921
+ 3,
922
+ 4,
923
+ 5,
924
+ 6,
925
+ 7,
926
+ 8,
927
+ 9,
928
+ 10,
929
+ 11,
930
+ 12,
931
+ 13,
932
+ 14,
933
+ 15
934
+ ],
935
+ "modality_keys": [
936
+ "end_effector_position",
937
+ "end_effector_rotation",
938
+ "gripper_close"
939
+ ],
940
+ "sin_cos_embedding_keys": null,
941
+ "mean_std_embedding_keys": null,
942
+ "action_configs": [
943
+ {
944
+ "rep": "DELTA",
945
+ "type": "EEF",
946
+ "format": "DEFAULT",
947
+ "state_key": null
948
+ },
949
+ {
950
+ "rep": "DELTA",
951
+ "type": "EEF",
952
+ "format": "DEFAULT",
953
+ "state_key": null
954
+ },
955
+ {
956
+ "rep": "ABSOLUTE",
957
+ "type": "NON_EEF",
958
+ "format": "DEFAULT",
959
+ "state_key": null
960
+ }
961
+ ]
962
+ },
963
+ "language": {
964
+ "delta_indices": [
965
+ 0
966
+ ],
967
+ "modality_keys": [
968
+ "annotation.human.action.task_description"
969
+ ],
970
+ "sin_cos_embedding_keys": null,
971
+ "mean_std_embedding_keys": null,
972
+ "action_configs": null
973
+ }
974
+ },
975
+ "furniture_bench_dataset_converted_externally_to_rlds": {
976
+ "video": {
977
+ "delta_indices": [
978
+ -6,
979
+ -4,
980
+ -2,
981
+ 0
982
+ ],
983
+ "modality_keys": [
984
+ "primary",
985
+ "wrist"
986
+ ],
987
+ "sin_cos_embedding_keys": null,
988
+ "mean_std_embedding_keys": null,
989
+ "action_configs": null
990
+ },
991
+ "state": {
992
+ "delta_indices": [
993
+ 0
994
+ ],
995
+ "modality_keys": [
996
+ "end_effector_position",
997
+ "end_effector_rotation",
998
+ "gripper_position"
999
+ ],
1000
+ "sin_cos_embedding_keys": null,
1001
+ "mean_std_embedding_keys": null,
1002
+ "action_configs": null
1003
+ },
1004
+ "action": {
1005
+ "delta_indices": [
1006
+ 0,
1007
+ 1,
1008
+ 2,
1009
+ 3,
1010
+ 4,
1011
+ 5,
1012
+ 6,
1013
+ 7,
1014
+ 8,
1015
+ 9,
1016
+ 10,
1017
+ 11,
1018
+ 12,
1019
+ 13,
1020
+ 14,
1021
+ 15
1022
+ ],
1023
+ "modality_keys": [
1024
+ "end_effector_position",
1025
+ "end_effector_rotation",
1026
+ "gripper_close"
1027
+ ],
1028
+ "sin_cos_embedding_keys": null,
1029
+ "mean_std_embedding_keys": null,
1030
+ "action_configs": [
1031
+ {
1032
+ "rep": "DELTA",
1033
+ "type": "EEF",
1034
+ "format": "DEFAULT",
1035
+ "state_key": null
1036
+ },
1037
+ {
1038
+ "rep": "DELTA",
1039
+ "type": "EEF",
1040
+ "format": "DEFAULT",
1041
+ "state_key": null
1042
+ },
1043
+ {
1044
+ "rep": "ABSOLUTE",
1045
+ "type": "NON_EEF",
1046
+ "format": "DEFAULT",
1047
+ "state_key": null
1048
+ }
1049
+ ]
1050
+ },
1051
+ "language": {
1052
+ "delta_indices": [
1053
+ 0
1054
+ ],
1055
+ "modality_keys": [
1056
+ "annotation.human.action.task_description"
1057
+ ],
1058
+ "sin_cos_embedding_keys": null,
1059
+ "mean_std_embedding_keys": null,
1060
+ "action_configs": null
1061
+ }
1062
+ },
1063
+ "neural_gr1": {
1064
+ "video": {
1065
+ "delta_indices": [
1066
+ -6,
1067
+ -4,
1068
+ -2,
1069
+ 0
1070
+ ],
1071
+ "modality_keys": [
1072
+ "ego_view"
1073
+ ],
1074
+ "sin_cos_embedding_keys": null,
1075
+ "mean_std_embedding_keys": null,
1076
+ "action_configs": null
1077
+ },
1078
+ "state": {
1079
+ "delta_indices": [
1080
+ 0
1081
+ ],
1082
+ "modality_keys": [
1083
+ "left_arm",
1084
+ "left_hand",
1085
+ "left_leg",
1086
+ "neck",
1087
+ "right_arm",
1088
+ "right_hand",
1089
+ "right_leg",
1090
+ "waist"
1091
+ ],
1092
+ "sin_cos_embedding_keys": null,
1093
+ "mean_std_embedding_keys": null,
1094
+ "action_configs": null
1095
+ },
1096
+ "action": {
1097
+ "delta_indices": [
1098
+ 0,
1099
+ 1,
1100
+ 2,
1101
+ 3,
1102
+ 4,
1103
+ 5,
1104
+ 6,
1105
+ 7,
1106
+ 8,
1107
+ 9,
1108
+ 10,
1109
+ 11,
1110
+ 12,
1111
+ 13,
1112
+ 14,
1113
+ 15
1114
+ ],
1115
+ "modality_keys": [
1116
+ "left_arm",
1117
+ "left_hand",
1118
+ "left_leg",
1119
+ "neck",
1120
+ "right_arm",
1121
+ "right_hand",
1122
+ "right_leg",
1123
+ "waist"
1124
+ ],
1125
+ "sin_cos_embedding_keys": null,
1126
+ "mean_std_embedding_keys": null,
1127
+ "action_configs": [
1128
+ {
1129
+ "rep": "ABSOLUTE",
1130
+ "type": "NON_EEF",
1131
+ "format": "DEFAULT",
1132
+ "state_key": null
1133
+ },
1134
+ {
1135
+ "rep": "ABSOLUTE",
1136
+ "type": "NON_EEF",
1137
+ "format": "DEFAULT",
1138
+ "state_key": null
1139
+ },
1140
+ {
1141
+ "rep": "ABSOLUTE",
1142
+ "type": "NON_EEF",
1143
+ "format": "DEFAULT",
1144
+ "state_key": null
1145
+ },
1146
+ {
1147
+ "rep": "ABSOLUTE",
1148
+ "type": "NON_EEF",
1149
+ "format": "DEFAULT",
1150
+ "state_key": null
1151
+ },
1152
+ {
1153
+ "rep": "ABSOLUTE",
1154
+ "type": "NON_EEF",
1155
+ "format": "DEFAULT",
1156
+ "state_key": null
1157
+ },
1158
+ {
1159
+ "rep": "ABSOLUTE",
1160
+ "type": "NON_EEF",
1161
+ "format": "DEFAULT",
1162
+ "state_key": null
1163
+ },
1164
+ {
1165
+ "rep": "ABSOLUTE",
1166
+ "type": "NON_EEF",
1167
+ "format": "DEFAULT",
1168
+ "state_key": null
1169
+ },
1170
+ {
1171
+ "rep": "ABSOLUTE",
1172
+ "type": "NON_EEF",
1173
+ "format": "DEFAULT",
1174
+ "state_key": null
1175
+ }
1176
+ ]
1177
+ },
1178
+ "language": {
1179
+ "delta_indices": [
1180
+ 0
1181
+ ],
1182
+ "modality_keys": [
1183
+ "annotation.human.action.task_description"
1184
+ ],
1185
+ "sin_cos_embedding_keys": null,
1186
+ "mean_std_embedding_keys": null,
1187
+ "action_configs": null
1188
+ }
1189
+ },
1190
+ "agibot_gripper": {
1191
+ "video": {
1192
+ "delta_indices": [
1193
+ -6,
1194
+ -4,
1195
+ -2,
1196
+ 0
1197
+ ],
1198
+ "modality_keys": [
1199
+ "primary",
1200
+ "wrist_left",
1201
+ "wrist_right"
1202
+ ],
1203
+ "sin_cos_embedding_keys": null,
1204
+ "mean_std_embedding_keys": null,
1205
+ "action_configs": null
1206
+ },
1207
+ "state": {
1208
+ "delta_indices": [
1209
+ 0
1210
+ ],
1211
+ "modality_keys": [
1212
+ "state"
1213
+ ],
1214
+ "sin_cos_embedding_keys": null,
1215
+ "mean_std_embedding_keys": null,
1216
+ "action_configs": null
1217
+ },
1218
+ "action": {
1219
+ "delta_indices": [
1220
+ 0,
1221
+ 1,
1222
+ 2,
1223
+ 3,
1224
+ 4,
1225
+ 5,
1226
+ 6,
1227
+ 7,
1228
+ 8,
1229
+ 9,
1230
+ 10,
1231
+ 11,
1232
+ 12,
1233
+ 13,
1234
+ 14,
1235
+ 15
1236
+ ],
1237
+ "modality_keys": [
1238
+ "action"
1239
+ ],
1240
+ "sin_cos_embedding_keys": null,
1241
+ "mean_std_embedding_keys": null,
1242
+ "action_configs": [
1243
+ {
1244
+ "rep": "ABSOLUTE",
1245
+ "type": "NON_EEF",
1246
+ "format": "DEFAULT",
1247
+ "state_key": null
1248
+ }
1249
+ ]
1250
+ },
1251
+ "language": {
1252
+ "delta_indices": [
1253
+ 0
1254
+ ],
1255
+ "modality_keys": [
1256
+ "annotation.human.action.task_description"
1257
+ ],
1258
+ "sin_cos_embedding_keys": null,
1259
+ "mean_std_embedding_keys": null,
1260
+ "action_configs": null
1261
+ }
1262
+ },
1263
+ "fmb_dataset": {
1264
+ "video": {
1265
+ "delta_indices": [
1266
+ -6,
1267
+ -4,
1268
+ -2,
1269
+ 0
1270
+ ],
1271
+ "modality_keys": [
1272
+ "primary",
1273
+ "secondary",
1274
+ "wrist"
1275
+ ],
1276
+ "sin_cos_embedding_keys": null,
1277
+ "mean_std_embedding_keys": null,
1278
+ "action_configs": null
1279
+ },
1280
+ "state": {
1281
+ "delta_indices": [
1282
+ 0
1283
+ ],
1284
+ "modality_keys": [
1285
+ "end_effector_position",
1286
+ "end_effector_rotation",
1287
+ "gripper_position"
1288
+ ],
1289
+ "sin_cos_embedding_keys": null,
1290
+ "mean_std_embedding_keys": null,
1291
+ "action_configs": null
1292
+ },
1293
+ "action": {
1294
+ "delta_indices": [
1295
+ 0,
1296
+ 1,
1297
+ 2,
1298
+ 3,
1299
+ 4,
1300
+ 5,
1301
+ 6,
1302
+ 7,
1303
+ 8,
1304
+ 9,
1305
+ 10,
1306
+ 11,
1307
+ 12,
1308
+ 13,
1309
+ 14,
1310
+ 15
1311
+ ],
1312
+ "modality_keys": [
1313
+ "end_effector_position",
1314
+ "end_effector_rotation",
1315
+ "gripper_close"
1316
+ ],
1317
+ "sin_cos_embedding_keys": null,
1318
+ "mean_std_embedding_keys": null,
1319
+ "action_configs": [
1320
+ {
1321
+ "rep": "DELTA",
1322
+ "type": "EEF",
1323
+ "format": "DEFAULT",
1324
+ "state_key": null
1325
+ },
1326
+ {
1327
+ "rep": "DELTA",
1328
+ "type": "EEF",
1329
+ "format": "DEFAULT",
1330
+ "state_key": null
1331
+ },
1332
+ {
1333
+ "rep": "ABSOLUTE",
1334
+ "type": "NON_EEF",
1335
+ "format": "DEFAULT",
1336
+ "state_key": null
1337
+ }
1338
+ ]
1339
+ },
1340
+ "language": {
1341
+ "delta_indices": [
1342
+ 0
1343
+ ],
1344
+ "modality_keys": [
1345
+ "annotation.human.action.task_description"
1346
+ ],
1347
+ "sin_cos_embedding_keys": null,
1348
+ "mean_std_embedding_keys": null,
1349
+ "action_configs": null
1350
+ }
1351
+ },
1352
+ "dobbe": {
1353
+ "video": {
1354
+ "delta_indices": [
1355
+ -6,
1356
+ -4,
1357
+ -2,
1358
+ 0
1359
+ ],
1360
+ "modality_keys": [
1361
+ "wrist"
1362
+ ],
1363
+ "sin_cos_embedding_keys": null,
1364
+ "mean_std_embedding_keys": null,
1365
+ "action_configs": null
1366
+ },
1367
+ "state": {
1368
+ "delta_indices": [
1369
+ 0
1370
+ ],
1371
+ "modality_keys": [
1372
+ "end_effector_position",
1373
+ "end_effector_rotation",
1374
+ "gripper_position"
1375
+ ],
1376
+ "sin_cos_embedding_keys": null,
1377
+ "mean_std_embedding_keys": null,
1378
+ "action_configs": null
1379
+ },
1380
+ "action": {
1381
+ "delta_indices": [
1382
+ 0,
1383
+ 1,
1384
+ 2,
1385
+ 3,
1386
+ 4,
1387
+ 5,
1388
+ 6,
1389
+ 7,
1390
+ 8,
1391
+ 9,
1392
+ 10,
1393
+ 11,
1394
+ 12,
1395
+ 13,
1396
+ 14,
1397
+ 15
1398
+ ],
1399
+ "modality_keys": [
1400
+ "end_effector_position",
1401
+ "end_effector_rotation",
1402
+ "gripper_close"
1403
+ ],
1404
+ "sin_cos_embedding_keys": null,
1405
+ "mean_std_embedding_keys": null,
1406
+ "action_configs": [
1407
+ {
1408
+ "rep": "DELTA",
1409
+ "type": "EEF",
1410
+ "format": "DEFAULT",
1411
+ "state_key": null
1412
+ },
1413
+ {
1414
+ "rep": "DELTA",
1415
+ "type": "EEF",
1416
+ "format": "DEFAULT",
1417
+ "state_key": null
1418
+ },
1419
+ {
1420
+ "rep": "ABSOLUTE",
1421
+ "type": "NON_EEF",
1422
+ "format": "DEFAULT",
1423
+ "state_key": null
1424
+ }
1425
+ ]
1426
+ },
1427
+ "language": {
1428
+ "delta_indices": [
1429
+ 0
1430
+ ],
1431
+ "modality_keys": [
1432
+ "annotation.human.action.task_description"
1433
+ ],
1434
+ "sin_cos_embedding_keys": null,
1435
+ "mean_std_embedding_keys": null,
1436
+ "action_configs": null
1437
+ }
1438
+ },
1439
+ "viola": {
1440
+ "video": {
1441
+ "delta_indices": [
1442
+ -6,
1443
+ -4,
1444
+ -2,
1445
+ 0
1446
+ ],
1447
+ "modality_keys": [
1448
+ "primary",
1449
+ "wrist"
1450
+ ],
1451
+ "sin_cos_embedding_keys": null,
1452
+ "mean_std_embedding_keys": null,
1453
+ "action_configs": null
1454
+ },
1455
+ "state": {
1456
+ "delta_indices": [
1457
+ 0
1458
+ ],
1459
+ "modality_keys": [
1460
+ "joint_position",
1461
+ "gripper_position"
1462
+ ],
1463
+ "sin_cos_embedding_keys": null,
1464
+ "mean_std_embedding_keys": null,
1465
+ "action_configs": null
1466
+ },
1467
+ "action": {
1468
+ "delta_indices": [
1469
+ 0,
1470
+ 1,
1471
+ 2,
1472
+ 3,
1473
+ 4,
1474
+ 5,
1475
+ 6,
1476
+ 7,
1477
+ 8,
1478
+ 9,
1479
+ 10,
1480
+ 11,
1481
+ 12,
1482
+ 13,
1483
+ 14,
1484
+ 15
1485
+ ],
1486
+ "modality_keys": [
1487
+ "end_effector_position",
1488
+ "end_effector_rotation",
1489
+ "gripper_close"
1490
+ ],
1491
+ "sin_cos_embedding_keys": null,
1492
+ "mean_std_embedding_keys": null,
1493
+ "action_configs": [
1494
+ {
1495
+ "rep": "DELTA",
1496
+ "type": "EEF",
1497
+ "format": "DEFAULT",
1498
+ "state_key": null
1499
+ },
1500
+ {
1501
+ "rep": "DELTA",
1502
+ "type": "EEF",
1503
+ "format": "DEFAULT",
1504
+ "state_key": null
1505
+ },
1506
+ {
1507
+ "rep": "ABSOLUTE",
1508
+ "type": "NON_EEF",
1509
+ "format": "DEFAULT",
1510
+ "state_key": null
1511
+ }
1512
+ ]
1513
+ },
1514
+ "language": {
1515
+ "delta_indices": [
1516
+ 0
1517
+ ],
1518
+ "modality_keys": [
1519
+ "annotation.human.action.task_description"
1520
+ ],
1521
+ "sin_cos_embedding_keys": null,
1522
+ "mean_std_embedding_keys": null,
1523
+ "action_configs": null
1524
+ }
1525
+ },
1526
+ "humanoid_everyday_h1": {
1527
+ "video": {
1528
+ "delta_indices": [
1529
+ -6,
1530
+ -4,
1531
+ -2,
1532
+ 0
1533
+ ],
1534
+ "modality_keys": [
1535
+ "egocentric_resized"
1536
+ ],
1537
+ "sin_cos_embedding_keys": null,
1538
+ "mean_std_embedding_keys": null,
1539
+ "action_configs": null
1540
+ },
1541
+ "state": {
1542
+ "delta_indices": [
1543
+ 0
1544
+ ],
1545
+ "modality_keys": [
1546
+ "left_arm",
1547
+ "left_hand",
1548
+ "right_arm",
1549
+ "right_hand"
1550
+ ],
1551
+ "sin_cos_embedding_keys": null,
1552
+ "mean_std_embedding_keys": null,
1553
+ "action_configs": null
1554
+ },
1555
+ "action": {
1556
+ "delta_indices": [
1557
+ 0,
1558
+ 1,
1559
+ 2,
1560
+ 3,
1561
+ 4,
1562
+ 5,
1563
+ 6,
1564
+ 7,
1565
+ 8,
1566
+ 9,
1567
+ 10,
1568
+ 11,
1569
+ 12,
1570
+ 13,
1571
+ 14,
1572
+ 15
1573
+ ],
1574
+ "modality_keys": [
1575
+ "left_arm",
1576
+ "left_hand",
1577
+ "right_arm",
1578
+ "right_hand"
1579
+ ],
1580
+ "sin_cos_embedding_keys": null,
1581
+ "mean_std_embedding_keys": null,
1582
+ "action_configs": [
1583
+ {
1584
+ "rep": "ABSOLUTE",
1585
+ "type": "NON_EEF",
1586
+ "format": "DEFAULT",
1587
+ "state_key": null
1588
+ },
1589
+ {
1590
+ "rep": "ABSOLUTE",
1591
+ "type": "NON_EEF",
1592
+ "format": "DEFAULT",
1593
+ "state_key": null
1594
+ },
1595
+ {
1596
+ "rep": "ABSOLUTE",
1597
+ "type": "NON_EEF",
1598
+ "format": "DEFAULT",
1599
+ "state_key": null
1600
+ },
1601
+ {
1602
+ "rep": "ABSOLUTE",
1603
+ "type": "NON_EEF",
1604
+ "format": "DEFAULT",
1605
+ "state_key": null
1606
+ }
1607
+ ]
1608
+ },
1609
+ "language": {
1610
+ "delta_indices": [
1611
+ 0
1612
+ ],
1613
+ "modality_keys": [
1614
+ "annotation.human.action.task_description"
1615
+ ],
1616
+ "sin_cos_embedding_keys": null,
1617
+ "mean_std_embedding_keys": null,
1618
+ "action_configs": null
1619
+ }
1620
+ },
1621
+ "austin_buds_dataset_converted_externally_to_rlds": {
1622
+ "video": {
1623
+ "delta_indices": [
1624
+ -6,
1625
+ -4,
1626
+ -2,
1627
+ 0
1628
+ ],
1629
+ "modality_keys": [
1630
+ "primary",
1631
+ "wrist"
1632
+ ],
1633
+ "sin_cos_embedding_keys": null,
1634
+ "mean_std_embedding_keys": null,
1635
+ "action_configs": null
1636
+ },
1637
+ "state": {
1638
+ "delta_indices": [
1639
+ 0
1640
+ ],
1641
+ "modality_keys": [
1642
+ "joint_position",
1643
+ "gripper_position"
1644
+ ],
1645
+ "sin_cos_embedding_keys": null,
1646
+ "mean_std_embedding_keys": null,
1647
+ "action_configs": null
1648
+ },
1649
+ "action": {
1650
+ "delta_indices": [
1651
+ 0,
1652
+ 1,
1653
+ 2,
1654
+ 3,
1655
+ 4,
1656
+ 5,
1657
+ 6,
1658
+ 7,
1659
+ 8,
1660
+ 9,
1661
+ 10,
1662
+ 11,
1663
+ 12,
1664
+ 13,
1665
+ 14,
1666
+ 15
1667
+ ],
1668
+ "modality_keys": [
1669
+ "end_effector_position",
1670
+ "end_effector_rotation",
1671
+ "gripper_close"
1672
+ ],
1673
+ "sin_cos_embedding_keys": null,
1674
+ "mean_std_embedding_keys": null,
1675
+ "action_configs": [
1676
+ {
1677
+ "rep": "DELTA",
1678
+ "type": "EEF",
1679
+ "format": "DEFAULT",
1680
+ "state_key": null
1681
+ },
1682
+ {
1683
+ "rep": "DELTA",
1684
+ "type": "EEF",
1685
+ "format": "DEFAULT",
1686
+ "state_key": null
1687
+ },
1688
+ {
1689
+ "rep": "ABSOLUTE",
1690
+ "type": "NON_EEF",
1691
+ "format": "DEFAULT",
1692
+ "state_key": null
1693
+ }
1694
+ ]
1695
+ },
1696
+ "language": {
1697
+ "delta_indices": [
1698
+ 0
1699
+ ],
1700
+ "modality_keys": [
1701
+ "annotation.human.action.task_description"
1702
+ ],
1703
+ "sin_cos_embedding_keys": null,
1704
+ "mean_std_embedding_keys": null,
1705
+ "action_configs": null
1706
+ }
1707
+ },
1708
+ "taco_play": {
1709
+ "video": {
1710
+ "delta_indices": [
1711
+ -6,
1712
+ -4,
1713
+ -2,
1714
+ 0
1715
+ ],
1716
+ "modality_keys": [
1717
+ "primary",
1718
+ "wrist"
1719
+ ],
1720
+ "sin_cos_embedding_keys": null,
1721
+ "mean_std_embedding_keys": null,
1722
+ "action_configs": null
1723
+ },
1724
+ "state": {
1725
+ "delta_indices": [
1726
+ 0
1727
+ ],
1728
+ "modality_keys": [
1729
+ "end_effector_position",
1730
+ "end_effector_rotation",
1731
+ "gripper_position"
1732
+ ],
1733
+ "sin_cos_embedding_keys": null,
1734
+ "mean_std_embedding_keys": null,
1735
+ "action_configs": null
1736
+ },
1737
+ "action": {
1738
+ "delta_indices": [
1739
+ 0,
1740
+ 1,
1741
+ 2,
1742
+ 3,
1743
+ 4,
1744
+ 5,
1745
+ 6,
1746
+ 7,
1747
+ 8,
1748
+ 9,
1749
+ 10,
1750
+ 11,
1751
+ 12,
1752
+ 13,
1753
+ 14,
1754
+ 15
1755
+ ],
1756
+ "modality_keys": [
1757
+ "end_effector_position",
1758
+ "end_effector_rotation",
1759
+ "gripper_close"
1760
+ ],
1761
+ "sin_cos_embedding_keys": null,
1762
+ "mean_std_embedding_keys": null,
1763
+ "action_configs": [
1764
+ {
1765
+ "rep": "DELTA",
1766
+ "type": "EEF",
1767
+ "format": "DEFAULT",
1768
+ "state_key": null
1769
+ },
1770
+ {
1771
+ "rep": "DELTA",
1772
+ "type": "EEF",
1773
+ "format": "DEFAULT",
1774
+ "state_key": null
1775
+ },
1776
+ {
1777
+ "rep": "ABSOLUTE",
1778
+ "type": "NON_EEF",
1779
+ "format": "DEFAULT",
1780
+ "state_key": null
1781
+ }
1782
+ ]
1783
+ },
1784
+ "language": {
1785
+ "delta_indices": [
1786
+ 0
1787
+ ],
1788
+ "modality_keys": [
1789
+ "annotation.human.action.task_description"
1790
+ ],
1791
+ "sin_cos_embedding_keys": null,
1792
+ "mean_std_embedding_keys": null,
1793
+ "action_configs": null
1794
+ }
1795
+ },
1796
+ "toto": {
1797
+ "video": {
1798
+ "delta_indices": [
1799
+ -6,
1800
+ -4,
1801
+ -2,
1802
+ 0
1803
+ ],
1804
+ "modality_keys": [
1805
+ "primary"
1806
+ ],
1807
+ "sin_cos_embedding_keys": null,
1808
+ "mean_std_embedding_keys": null,
1809
+ "action_configs": null
1810
+ },
1811
+ "state": {
1812
+ "delta_indices": [
1813
+ 0
1814
+ ],
1815
+ "modality_keys": [
1816
+ "joint_position",
1817
+ "gripper_position"
1818
+ ],
1819
+ "sin_cos_embedding_keys": null,
1820
+ "mean_std_embedding_keys": null,
1821
+ "action_configs": null
1822
+ },
1823
+ "action": {
1824
+ "delta_indices": [
1825
+ 0,
1826
+ 1,
1827
+ 2,
1828
+ 3,
1829
+ 4,
1830
+ 5,
1831
+ 6,
1832
+ 7,
1833
+ 8,
1834
+ 9,
1835
+ 10,
1836
+ 11,
1837
+ 12,
1838
+ 13,
1839
+ 14,
1840
+ 15
1841
+ ],
1842
+ "modality_keys": [
1843
+ "end_effector_position",
1844
+ "end_effector_rotation",
1845
+ "gripper_close"
1846
+ ],
1847
+ "sin_cos_embedding_keys": null,
1848
+ "mean_std_embedding_keys": null,
1849
+ "action_configs": [
1850
+ {
1851
+ "rep": "DELTA",
1852
+ "type": "EEF",
1853
+ "format": "DEFAULT",
1854
+ "state_key": null
1855
+ },
1856
+ {
1857
+ "rep": "DELTA",
1858
+ "type": "EEF",
1859
+ "format": "DEFAULT",
1860
+ "state_key": null
1861
+ },
1862
+ {
1863
+ "rep": "ABSOLUTE",
1864
+ "type": "NON_EEF",
1865
+ "format": "DEFAULT",
1866
+ "state_key": null
1867
+ }
1868
+ ]
1869
+ },
1870
+ "language": {
1871
+ "delta_indices": [
1872
+ 0
1873
+ ],
1874
+ "modality_keys": [
1875
+ "annotation.human.action.task_description"
1876
+ ],
1877
+ "sin_cos_embedding_keys": null,
1878
+ "mean_std_embedding_keys": null,
1879
+ "action_configs": null
1880
+ }
1881
+ },
1882
+ "language_table": {
1883
+ "video": {
1884
+ "delta_indices": [
1885
+ -6,
1886
+ -4,
1887
+ -2,
1888
+ 0
1889
+ ],
1890
+ "modality_keys": [
1891
+ "primary"
1892
+ ],
1893
+ "sin_cos_embedding_keys": null,
1894
+ "mean_std_embedding_keys": null,
1895
+ "action_configs": null
1896
+ },
1897
+ "state": {
1898
+ "delta_indices": [
1899
+ 0
1900
+ ],
1901
+ "modality_keys": [
1902
+ "end_effector_position"
1903
+ ],
1904
+ "sin_cos_embedding_keys": null,
1905
+ "mean_std_embedding_keys": null,
1906
+ "action_configs": null
1907
+ },
1908
+ "action": {
1909
+ "delta_indices": [
1910
+ 0,
1911
+ 1,
1912
+ 2,
1913
+ 3,
1914
+ 4,
1915
+ 5,
1916
+ 6,
1917
+ 7,
1918
+ 8,
1919
+ 9,
1920
+ 10,
1921
+ 11,
1922
+ 12,
1923
+ 13,
1924
+ 14,
1925
+ 15
1926
+ ],
1927
+ "modality_keys": [
1928
+ "end_effector_position"
1929
+ ],
1930
+ "sin_cos_embedding_keys": null,
1931
+ "mean_std_embedding_keys": null,
1932
+ "action_configs": [
1933
+ {
1934
+ "rep": "DELTA",
1935
+ "type": "EEF",
1936
+ "format": "DEFAULT",
1937
+ "state_key": null
1938
+ }
1939
+ ]
1940
+ },
1941
+ "language": {
1942
+ "delta_indices": [
1943
+ 0
1944
+ ],
1945
+ "modality_keys": [
1946
+ "annotation.human.action.task_description"
1947
+ ],
1948
+ "sin_cos_embedding_keys": null,
1949
+ "mean_std_embedding_keys": null,
1950
+ "action_configs": null
1951
+ }
1952
+ },
1953
+ "nyu_franka_play_dataset_converted_externally_to_rlds": {
1954
+ "video": {
1955
+ "delta_indices": [
1956
+ -6,
1957
+ -4,
1958
+ -2,
1959
+ 0
1960
+ ],
1961
+ "modality_keys": [
1962
+ "primary",
1963
+ "secondary"
1964
+ ],
1965
+ "sin_cos_embedding_keys": null,
1966
+ "mean_std_embedding_keys": null,
1967
+ "action_configs": null
1968
+ },
1969
+ "state": {
1970
+ "delta_indices": [
1971
+ 0
1972
+ ],
1973
+ "modality_keys": [
1974
+ "end_effector_position",
1975
+ "end_effector_rotation",
1976
+ "gripper_position"
1977
+ ],
1978
+ "sin_cos_embedding_keys": null,
1979
+ "mean_std_embedding_keys": null,
1980
+ "action_configs": null
1981
+ },
1982
+ "action": {
1983
+ "delta_indices": [
1984
+ 0,
1985
+ 1,
1986
+ 2,
1987
+ 3,
1988
+ 4,
1989
+ 5,
1990
+ 6,
1991
+ 7,
1992
+ 8,
1993
+ 9,
1994
+ 10,
1995
+ 11,
1996
+ 12,
1997
+ 13,
1998
+ 14,
1999
+ 15
2000
+ ],
2001
+ "modality_keys": [
2002
+ "end_effector_position",
2003
+ "end_effector_rotation",
2004
+ "gripper_close"
2005
+ ],
2006
+ "sin_cos_embedding_keys": null,
2007
+ "mean_std_embedding_keys": null,
2008
+ "action_configs": [
2009
+ {
2010
+ "rep": "DELTA",
2011
+ "type": "EEF",
2012
+ "format": "DEFAULT",
2013
+ "state_key": null
2014
+ },
2015
+ {
2016
+ "rep": "DELTA",
2017
+ "type": "EEF",
2018
+ "format": "DEFAULT",
2019
+ "state_key": null
2020
+ },
2021
+ {
2022
+ "rep": "ABSOLUTE",
2023
+ "type": "NON_EEF",
2024
+ "format": "DEFAULT",
2025
+ "state_key": null
2026
+ }
2027
+ ]
2028
+ },
2029
+ "language": {
2030
+ "delta_indices": [
2031
+ 0
2032
+ ],
2033
+ "modality_keys": [
2034
+ "annotation.human.action.task_description"
2035
+ ],
2036
+ "sin_cos_embedding_keys": null,
2037
+ "mean_std_embedding_keys": null,
2038
+ "action_configs": null
2039
+ }
2040
+ },
2041
+ "ucsd_kitchen_dataset_converted_externally_to_rlds": {
2042
+ "video": {
2043
+ "delta_indices": [
2044
+ -6,
2045
+ -4,
2046
+ -2,
2047
+ 0
2048
+ ],
2049
+ "modality_keys": [
2050
+ "primary"
2051
+ ],
2052
+ "sin_cos_embedding_keys": null,
2053
+ "mean_std_embedding_keys": null,
2054
+ "action_configs": null
2055
+ },
2056
+ "state": {
2057
+ "delta_indices": [
2058
+ 0
2059
+ ],
2060
+ "modality_keys": [
2061
+ "joint_position"
2062
+ ],
2063
+ "sin_cos_embedding_keys": null,
2064
+ "mean_std_embedding_keys": null,
2065
+ "action_configs": null
2066
+ },
2067
+ "action": {
2068
+ "delta_indices": [
2069
+ 0,
2070
+ 1,
2071
+ 2,
2072
+ 3,
2073
+ 4,
2074
+ 5,
2075
+ 6,
2076
+ 7,
2077
+ 8,
2078
+ 9,
2079
+ 10,
2080
+ 11,
2081
+ 12,
2082
+ 13,
2083
+ 14,
2084
+ 15
2085
+ ],
2086
+ "modality_keys": [
2087
+ "end_effector_position",
2088
+ "end_effector_rotation",
2089
+ "gripper_close"
2090
+ ],
2091
+ "sin_cos_embedding_keys": null,
2092
+ "mean_std_embedding_keys": null,
2093
+ "action_configs": [
2094
+ {
2095
+ "rep": "DELTA",
2096
+ "type": "EEF",
2097
+ "format": "DEFAULT",
2098
+ "state_key": null
2099
+ },
2100
+ {
2101
+ "rep": "DELTA",
2102
+ "type": "EEF",
2103
+ "format": "DEFAULT",
2104
+ "state_key": null
2105
+ },
2106
+ {
2107
+ "rep": "ABSOLUTE",
2108
+ "type": "NON_EEF",
2109
+ "format": "DEFAULT",
2110
+ "state_key": null
2111
+ }
2112
+ ]
2113
+ },
2114
+ "language": {
2115
+ "delta_indices": [
2116
+ 0
2117
+ ],
2118
+ "modality_keys": [
2119
+ "annotation.human.action.task_description"
2120
+ ],
2121
+ "sin_cos_embedding_keys": null,
2122
+ "mean_std_embedding_keys": null,
2123
+ "action_configs": null
2124
+ }
2125
+ },
2126
+ "austin_sirius_dataset_converted_externally_to_rlds": {
2127
+ "video": {
2128
+ "delta_indices": [
2129
+ -6,
2130
+ -4,
2131
+ -2,
2132
+ 0
2133
+ ],
2134
+ "modality_keys": [
2135
+ "primary",
2136
+ "wrist"
2137
+ ],
2138
+ "sin_cos_embedding_keys": null,
2139
+ "mean_std_embedding_keys": null,
2140
+ "action_configs": null
2141
+ },
2142
+ "state": {
2143
+ "delta_indices": [
2144
+ 0
2145
+ ],
2146
+ "modality_keys": [
2147
+ "end_effector_position",
2148
+ "end_effector_rotation",
2149
+ "gripper_position"
2150
+ ],
2151
+ "sin_cos_embedding_keys": null,
2152
+ "mean_std_embedding_keys": null,
2153
+ "action_configs": null
2154
+ },
2155
+ "action": {
2156
+ "delta_indices": [
2157
+ 0,
2158
+ 1,
2159
+ 2,
2160
+ 3,
2161
+ 4,
2162
+ 5,
2163
+ 6,
2164
+ 7,
2165
+ 8,
2166
+ 9,
2167
+ 10,
2168
+ 11,
2169
+ 12,
2170
+ 13,
2171
+ 14,
2172
+ 15
2173
+ ],
2174
+ "modality_keys": [
2175
+ "end_effector_position",
2176
+ "end_effector_rotation",
2177
+ "gripper_close"
2178
+ ],
2179
+ "sin_cos_embedding_keys": null,
2180
+ "mean_std_embedding_keys": null,
2181
+ "action_configs": [
2182
+ {
2183
+ "rep": "DELTA",
2184
+ "type": "EEF",
2185
+ "format": "DEFAULT",
2186
+ "state_key": null
2187
+ },
2188
+ {
2189
+ "rep": "DELTA",
2190
+ "type": "EEF",
2191
+ "format": "DEFAULT",
2192
+ "state_key": null
2193
+ },
2194
+ {
2195
+ "rep": "ABSOLUTE",
2196
+ "type": "NON_EEF",
2197
+ "format": "DEFAULT",
2198
+ "state_key": null
2199
+ }
2200
+ ]
2201
+ },
2202
+ "language": {
2203
+ "delta_indices": [
2204
+ 0
2205
+ ],
2206
+ "modality_keys": [
2207
+ "annotation.human.action.task_description"
2208
+ ],
2209
+ "sin_cos_embedding_keys": null,
2210
+ "mean_std_embedding_keys": null,
2211
+ "action_configs": null
2212
+ }
2213
+ },
2214
+ "droid": {
2215
+ "video": {
2216
+ "delta_indices": [
2217
+ -54,
2218
+ -52,
2219
+ -50,
2220
+ -48,
2221
+ -38,
2222
+ -36,
2223
+ -34,
2224
+ -32,
2225
+ -22,
2226
+ -20,
2227
+ -18,
2228
+ -16,
2229
+ -6,
2230
+ -4,
2231
+ -2,
2232
+ 0
2233
+ ],
2234
+ "modality_keys": [
2235
+ "primary",
2236
+ "wrist"
2237
+ ],
2238
+ "sin_cos_embedding_keys": null,
2239
+ "mean_std_embedding_keys": null,
2240
+ "action_configs": null
2241
+ },
2242
+ "state": {
2243
+ "delta_indices": [
2244
+ 0
2245
+ ],
2246
+ "modality_keys": [
2247
+ "end_effector_position",
2248
+ "end_effector_rotation",
2249
+ "gripper_position"
2250
+ ],
2251
+ "sin_cos_embedding_keys": null,
2252
+ "mean_std_embedding_keys": null,
2253
+ "action_configs": null
2254
+ },
2255
+ "action": {
2256
+ "delta_indices": [
2257
+ 0,
2258
+ 1,
2259
+ 2,
2260
+ 3,
2261
+ 4,
2262
+ 5,
2263
+ 6,
2264
+ 7,
2265
+ 8,
2266
+ 9,
2267
+ 10,
2268
+ 11,
2269
+ 12,
2270
+ 13,
2271
+ 14,
2272
+ 15
2273
+ ],
2274
+ "modality_keys": [
2275
+ "end_effector_position",
2276
+ "end_effector_rotation",
2277
+ "gripper_close"
2278
+ ],
2279
+ "sin_cos_embedding_keys": null,
2280
+ "mean_std_embedding_keys": null,
2281
+ "action_configs": [
2282
+ {
2283
+ "rep": "DELTA",
2284
+ "type": "EEF",
2285
+ "format": "DEFAULT",
2286
+ "state_key": null
2287
+ },
2288
+ {
2289
+ "rep": "DELTA",
2290
+ "type": "EEF",
2291
+ "format": "DEFAULT",
2292
+ "state_key": null
2293
+ },
2294
+ {
2295
+ "rep": "ABSOLUTE",
2296
+ "type": "NON_EEF",
2297
+ "format": "DEFAULT",
2298
+ "state_key": null
2299
+ }
2300
+ ]
2301
+ },
2302
+ "language": {
2303
+ "delta_indices": [
2304
+ 0
2305
+ ],
2306
+ "modality_keys": [
2307
+ "annotation.human.action.task_description"
2308
+ ],
2309
+ "sin_cos_embedding_keys": null,
2310
+ "mean_std_embedding_keys": null,
2311
+ "action_configs": null
2312
+ },
2313
+ "tactile": {
2314
+ "delta_indices": [
2315
+ 0,
2316
+ 1,
2317
+ 2,
2318
+ 3,
2319
+ 4,
2320
+ 5,
2321
+ 6,
2322
+ 7,
2323
+ 8,
2324
+ 9,
2325
+ 10,
2326
+ 11,
2327
+ 12,
2328
+ 13,
2329
+ 14,
2330
+ 15,
2331
+ 16
2332
+ ],
2333
+ "modality_keys": [
2334
+ "left"
2335
+ ],
2336
+ "sin_cos_embedding_keys": null,
2337
+ "mean_std_embedding_keys": null,
2338
+ "action_configs": null
2339
+ },
2340
+ "torque": {
2341
+ "delta_indices": [
2342
+ 0,
2343
+ 1,
2344
+ 2,
2345
+ 3,
2346
+ 4,
2347
+ 5,
2348
+ 6,
2349
+ 7,
2350
+ 8,
2351
+ 9,
2352
+ 10,
2353
+ 11,
2354
+ 12,
2355
+ 13,
2356
+ 14,
2357
+ 15,
2358
+ 16
2359
+ ],
2360
+ "modality_keys": [
2361
+ "torque"
2362
+ ],
2363
+ "sin_cos_embedding_keys": null,
2364
+ "mean_std_embedding_keys": null,
2365
+ "action_configs": null
2366
+ }
2367
+ },
2368
+ "bc_z": {
2369
+ "video": {
2370
+ "delta_indices": [
2371
+ -6,
2372
+ -4,
2373
+ -2,
2374
+ 0
2375
+ ],
2376
+ "modality_keys": [
2377
+ "primary"
2378
+ ],
2379
+ "sin_cos_embedding_keys": null,
2380
+ "mean_std_embedding_keys": null,
2381
+ "action_configs": null
2382
+ },
2383
+ "state": {
2384
+ "delta_indices": [
2385
+ 0
2386
+ ],
2387
+ "modality_keys": [
2388
+ "end_effector_position",
2389
+ "end_effector_rotation",
2390
+ "gripper_position"
2391
+ ],
2392
+ "sin_cos_embedding_keys": null,
2393
+ "mean_std_embedding_keys": null,
2394
+ "action_configs": null
2395
+ },
2396
+ "action": {
2397
+ "delta_indices": [
2398
+ 0,
2399
+ 1,
2400
+ 2,
2401
+ 3,
2402
+ 4,
2403
+ 5,
2404
+ 6,
2405
+ 7,
2406
+ 8,
2407
+ 9,
2408
+ 10,
2409
+ 11,
2410
+ 12,
2411
+ 13,
2412
+ 14,
2413
+ 15
2414
+ ],
2415
+ "modality_keys": [
2416
+ "end_effector_position",
2417
+ "end_effector_rotation",
2418
+ "gripper_close"
2419
+ ],
2420
+ "sin_cos_embedding_keys": null,
2421
+ "mean_std_embedding_keys": null,
2422
+ "action_configs": [
2423
+ {
2424
+ "rep": "DELTA",
2425
+ "type": "EEF",
2426
+ "format": "DEFAULT",
2427
+ "state_key": null
2428
+ },
2429
+ {
2430
+ "rep": "DELTA",
2431
+ "type": "EEF",
2432
+ "format": "DEFAULT",
2433
+ "state_key": null
2434
+ },
2435
+ {
2436
+ "rep": "ABSOLUTE",
2437
+ "type": "NON_EEF",
2438
+ "format": "DEFAULT",
2439
+ "state_key": null
2440
+ }
2441
+ ]
2442
+ },
2443
+ "language": {
2444
+ "delta_indices": [
2445
+ 0
2446
+ ],
2447
+ "modality_keys": [
2448
+ "annotation.human.action.task_description"
2449
+ ],
2450
+ "sin_cos_embedding_keys": null,
2451
+ "mean_std_embedding_keys": null,
2452
+ "action_configs": null
2453
+ }
2454
+ },
2455
+ "kuka": {
2456
+ "video": {
2457
+ "delta_indices": [
2458
+ -6,
2459
+ -4,
2460
+ -2,
2461
+ 0
2462
+ ],
2463
+ "modality_keys": [
2464
+ "primary"
2465
+ ],
2466
+ "sin_cos_embedding_keys": null,
2467
+ "mean_std_embedding_keys": null,
2468
+ "action_configs": null
2469
+ },
2470
+ "state": {
2471
+ "delta_indices": [
2472
+ 0
2473
+ ],
2474
+ "modality_keys": [
2475
+ "end_effector_position",
2476
+ "end_effector_rotation",
2477
+ "gripper_position"
2478
+ ],
2479
+ "sin_cos_embedding_keys": null,
2480
+ "mean_std_embedding_keys": null,
2481
+ "action_configs": null
2482
+ },
2483
+ "action": {
2484
+ "delta_indices": [
2485
+ 0,
2486
+ 1,
2487
+ 2,
2488
+ 3,
2489
+ 4,
2490
+ 5,
2491
+ 6,
2492
+ 7,
2493
+ 8,
2494
+ 9,
2495
+ 10,
2496
+ 11,
2497
+ 12,
2498
+ 13,
2499
+ 14,
2500
+ 15
2501
+ ],
2502
+ "modality_keys": [
2503
+ "end_effector_position",
2504
+ "end_effector_rotation",
2505
+ "gripper_close"
2506
+ ],
2507
+ "sin_cos_embedding_keys": null,
2508
+ "mean_std_embedding_keys": null,
2509
+ "action_configs": [
2510
+ {
2511
+ "rep": "DELTA",
2512
+ "type": "EEF",
2513
+ "format": "DEFAULT",
2514
+ "state_key": null
2515
+ },
2516
+ {
2517
+ "rep": "DELTA",
2518
+ "type": "EEF",
2519
+ "format": "DEFAULT",
2520
+ "state_key": null
2521
+ },
2522
+ {
2523
+ "rep": "ABSOLUTE",
2524
+ "type": "NON_EEF",
2525
+ "format": "DEFAULT",
2526
+ "state_key": null
2527
+ }
2528
+ ]
2529
+ },
2530
+ "language": {
2531
+ "delta_indices": [
2532
+ 0
2533
+ ],
2534
+ "modality_keys": [
2535
+ "annotation.human.action.task_description"
2536
+ ],
2537
+ "sin_cos_embedding_keys": null,
2538
+ "mean_std_embedding_keys": null,
2539
+ "action_configs": null
2540
+ }
2541
+ },
2542
+ "agibot_dexhand": {
2543
+ "video": {
2544
+ "delta_indices": [
2545
+ -6,
2546
+ -4,
2547
+ -2,
2548
+ 0
2549
+ ],
2550
+ "modality_keys": [
2551
+ "primary"
2552
+ ],
2553
+ "sin_cos_embedding_keys": null,
2554
+ "mean_std_embedding_keys": null,
2555
+ "action_configs": null
2556
+ },
2557
+ "state": {
2558
+ "delta_indices": [
2559
+ 0
2560
+ ],
2561
+ "modality_keys": [
2562
+ "state"
2563
+ ],
2564
+ "sin_cos_embedding_keys": null,
2565
+ "mean_std_embedding_keys": null,
2566
+ "action_configs": null
2567
+ },
2568
+ "action": {
2569
+ "delta_indices": [
2570
+ 0,
2571
+ 1,
2572
+ 2,
2573
+ 3,
2574
+ 4,
2575
+ 5,
2576
+ 6,
2577
+ 7,
2578
+ 8,
2579
+ 9,
2580
+ 10,
2581
+ 11,
2582
+ 12,
2583
+ 13,
2584
+ 14,
2585
+ 15
2586
+ ],
2587
+ "modality_keys": [
2588
+ "action"
2589
+ ],
2590
+ "sin_cos_embedding_keys": null,
2591
+ "mean_std_embedding_keys": null,
2592
+ "action_configs": [
2593
+ {
2594
+ "rep": "ABSOLUTE",
2595
+ "type": "NON_EEF",
2596
+ "format": "DEFAULT",
2597
+ "state_key": null
2598
+ }
2599
+ ]
2600
+ },
2601
+ "language": {
2602
+ "delta_indices": [
2603
+ 0
2604
+ ],
2605
+ "modality_keys": [
2606
+ "annotation.human.action.task_description"
2607
+ ],
2608
+ "sin_cos_embedding_keys": null,
2609
+ "mean_std_embedding_keys": null,
2610
+ "action_configs": null
2611
+ }
2612
+ },
2613
+ "action_net": {
2614
+ "video": {
2615
+ "delta_indices": [
2616
+ -6,
2617
+ -4,
2618
+ -2,
2619
+ 0
2620
+ ],
2621
+ "modality_keys": [
2622
+ "primary"
2623
+ ],
2624
+ "sin_cos_embedding_keys": null,
2625
+ "mean_std_embedding_keys": null,
2626
+ "action_configs": null
2627
+ },
2628
+ "state": {
2629
+ "delta_indices": [
2630
+ 0
2631
+ ],
2632
+ "modality_keys": [
2633
+ "state"
2634
+ ],
2635
+ "sin_cos_embedding_keys": null,
2636
+ "mean_std_embedding_keys": null,
2637
+ "action_configs": null
2638
+ },
2639
+ "action": {
2640
+ "delta_indices": [
2641
+ 0,
2642
+ 1,
2643
+ 2,
2644
+ 3,
2645
+ 4,
2646
+ 5,
2647
+ 6,
2648
+ 7,
2649
+ 8,
2650
+ 9,
2651
+ 10,
2652
+ 11,
2653
+ 12,
2654
+ 13,
2655
+ 14,
2656
+ 15
2657
+ ],
2658
+ "modality_keys": [
2659
+ "action"
2660
+ ],
2661
+ "sin_cos_embedding_keys": null,
2662
+ "mean_std_embedding_keys": null,
2663
+ "action_configs": [
2664
+ {
2665
+ "rep": "ABSOLUTE",
2666
+ "type": "NON_EEF",
2667
+ "format": "DEFAULT",
2668
+ "state_key": null
2669
+ }
2670
+ ]
2671
+ },
2672
+ "language": {
2673
+ "delta_indices": [
2674
+ 0
2675
+ ],
2676
+ "modality_keys": [
2677
+ "annotation.human.action.task_description"
2678
+ ],
2679
+ "sin_cos_embedding_keys": null,
2680
+ "mean_std_embedding_keys": null,
2681
+ "action_configs": null
2682
+ }
2683
+ },
2684
+ "galaxea": {
2685
+ "video": {
2686
+ "delta_indices": [
2687
+ -6,
2688
+ -4,
2689
+ -2,
2690
+ 0
2691
+ ],
2692
+ "modality_keys": [
2693
+ "primary",
2694
+ "wrist_left",
2695
+ "wrist_right"
2696
+ ],
2697
+ "sin_cos_embedding_keys": null,
2698
+ "mean_std_embedding_keys": null,
2699
+ "action_configs": null
2700
+ },
2701
+ "state": {
2702
+ "delta_indices": [
2703
+ 0
2704
+ ],
2705
+ "modality_keys": [
2706
+ "state"
2707
+ ],
2708
+ "sin_cos_embedding_keys": null,
2709
+ "mean_std_embedding_keys": null,
2710
+ "action_configs": null
2711
+ },
2712
+ "action": {
2713
+ "delta_indices": [
2714
+ 0,
2715
+ 1,
2716
+ 2,
2717
+ 3,
2718
+ 4,
2719
+ 5,
2720
+ 6,
2721
+ 7,
2722
+ 8,
2723
+ 9,
2724
+ 10,
2725
+ 11,
2726
+ 12,
2727
+ 13,
2728
+ 14,
2729
+ 15
2730
+ ],
2731
+ "modality_keys": [
2732
+ "action"
2733
+ ],
2734
+ "sin_cos_embedding_keys": null,
2735
+ "mean_std_embedding_keys": null,
2736
+ "action_configs": [
2737
+ {
2738
+ "rep": "ABSOLUTE",
2739
+ "type": "NON_EEF",
2740
+ "format": "DEFAULT",
2741
+ "state_key": null
2742
+ }
2743
+ ]
2744
+ },
2745
+ "language": {
2746
+ "delta_indices": [
2747
+ 0
2748
+ ],
2749
+ "modality_keys": [
2750
+ "annotation.human.action.task_description"
2751
+ ],
2752
+ "sin_cos_embedding_keys": null,
2753
+ "mean_std_embedding_keys": null,
2754
+ "action_configs": null
2755
+ }
2756
+ },
2757
+ "roboturk": {
2758
+ "video": {
2759
+ "delta_indices": [
2760
+ -6,
2761
+ -4,
2762
+ -2,
2763
+ 0
2764
+ ],
2765
+ "modality_keys": [
2766
+ "primary"
2767
+ ],
2768
+ "sin_cos_embedding_keys": null,
2769
+ "mean_std_embedding_keys": null,
2770
+ "action_configs": null
2771
+ },
2772
+ "state": {
2773
+ "delta_indices": [
2774
+ 0
2775
+ ],
2776
+ "modality_keys": [
2777
+ "none"
2778
+ ],
2779
+ "sin_cos_embedding_keys": null,
2780
+ "mean_std_embedding_keys": null,
2781
+ "action_configs": null
2782
+ },
2783
+ "action": {
2784
+ "delta_indices": [
2785
+ 0,
2786
+ 1,
2787
+ 2,
2788
+ 3,
2789
+ 4,
2790
+ 5,
2791
+ 6,
2792
+ 7,
2793
+ 8,
2794
+ 9,
2795
+ 10,
2796
+ 11,
2797
+ 12,
2798
+ 13,
2799
+ 14,
2800
+ 15
2801
+ ],
2802
+ "modality_keys": [
2803
+ "end_effector_position",
2804
+ "end_effector_rotation",
2805
+ "gripper_close"
2806
+ ],
2807
+ "sin_cos_embedding_keys": null,
2808
+ "mean_std_embedding_keys": null,
2809
+ "action_configs": [
2810
+ {
2811
+ "rep": "DELTA",
2812
+ "type": "EEF",
2813
+ "format": "DEFAULT",
2814
+ "state_key": null
2815
+ },
2816
+ {
2817
+ "rep": "DELTA",
2818
+ "type": "EEF",
2819
+ "format": "DEFAULT",
2820
+ "state_key": null
2821
+ },
2822
+ {
2823
+ "rep": "ABSOLUTE",
2824
+ "type": "NON_EEF",
2825
+ "format": "DEFAULT",
2826
+ "state_key": null
2827
+ }
2828
+ ]
2829
+ },
2830
+ "language": {
2831
+ "delta_indices": [
2832
+ 0
2833
+ ],
2834
+ "modality_keys": [
2835
+ "annotation.human.action.task_description"
2836
+ ],
2837
+ "sin_cos_embedding_keys": null,
2838
+ "mean_std_embedding_keys": null,
2839
+ "action_configs": null
2840
+ }
2841
+ },
2842
+ "berkeley_fanuc_manipulation": {
2843
+ "video": {
2844
+ "delta_indices": [
2845
+ -6,
2846
+ -4,
2847
+ -2,
2848
+ 0
2849
+ ],
2850
+ "modality_keys": [
2851
+ "primary",
2852
+ "wrist"
2853
+ ],
2854
+ "sin_cos_embedding_keys": null,
2855
+ "mean_std_embedding_keys": null,
2856
+ "action_configs": null
2857
+ },
2858
+ "state": {
2859
+ "delta_indices": [
2860
+ 0
2861
+ ],
2862
+ "modality_keys": [
2863
+ "joint_position",
2864
+ "gripper_position"
2865
+ ],
2866
+ "sin_cos_embedding_keys": null,
2867
+ "mean_std_embedding_keys": null,
2868
+ "action_configs": null
2869
+ },
2870
+ "action": {
2871
+ "delta_indices": [
2872
+ 0,
2873
+ 1,
2874
+ 2,
2875
+ 3,
2876
+ 4,
2877
+ 5,
2878
+ 6,
2879
+ 7,
2880
+ 8,
2881
+ 9,
2882
+ 10,
2883
+ 11,
2884
+ 12,
2885
+ 13,
2886
+ 14,
2887
+ 15
2888
+ ],
2889
+ "modality_keys": [
2890
+ "end_effector_position",
2891
+ "end_effector_rotation",
2892
+ "gripper_close"
2893
+ ],
2894
+ "sin_cos_embedding_keys": null,
2895
+ "mean_std_embedding_keys": null,
2896
+ "action_configs": [
2897
+ {
2898
+ "rep": "DELTA",
2899
+ "type": "EEF",
2900
+ "format": "DEFAULT",
2901
+ "state_key": null
2902
+ },
2903
+ {
2904
+ "rep": "DELTA",
2905
+ "type": "EEF",
2906
+ "format": "DEFAULT",
2907
+ "state_key": null
2908
+ },
2909
+ {
2910
+ "rep": "ABSOLUTE",
2911
+ "type": "NON_EEF",
2912
+ "format": "DEFAULT",
2913
+ "state_key": null
2914
+ }
2915
+ ]
2916
+ },
2917
+ "language": {
2918
+ "delta_indices": [
2919
+ 0
2920
+ ],
2921
+ "modality_keys": [
2922
+ "annotation.human.action.task_description"
2923
+ ],
2924
+ "sin_cos_embedding_keys": null,
2925
+ "mean_std_embedding_keys": null,
2926
+ "action_configs": null
2927
+ }
2928
+ },
2929
+ "jaco_play": {
2930
+ "video": {
2931
+ "delta_indices": [
2932
+ -6,
2933
+ -4,
2934
+ -2,
2935
+ 0
2936
+ ],
2937
+ "modality_keys": [
2938
+ "primary",
2939
+ "wrist"
2940
+ ],
2941
+ "sin_cos_embedding_keys": null,
2942
+ "mean_std_embedding_keys": null,
2943
+ "action_configs": null
2944
+ },
2945
+ "state": {
2946
+ "delta_indices": [
2947
+ 0
2948
+ ],
2949
+ "modality_keys": [
2950
+ "end_effector_position",
2951
+ "end_effector_rotation",
2952
+ "gripper_position"
2953
+ ],
2954
+ "sin_cos_embedding_keys": null,
2955
+ "mean_std_embedding_keys": null,
2956
+ "action_configs": null
2957
+ },
2958
+ "action": {
2959
+ "delta_indices": [
2960
+ 0,
2961
+ 1,
2962
+ 2,
2963
+ 3,
2964
+ 4,
2965
+ 5,
2966
+ 6,
2967
+ 7,
2968
+ 8,
2969
+ 9,
2970
+ 10,
2971
+ 11,
2972
+ 12,
2973
+ 13,
2974
+ 14,
2975
+ 15
2976
+ ],
2977
+ "modality_keys": [
2978
+ "end_effector_position",
2979
+ "end_effector_rotation",
2980
+ "gripper_close"
2981
+ ],
2982
+ "sin_cos_embedding_keys": null,
2983
+ "mean_std_embedding_keys": null,
2984
+ "action_configs": [
2985
+ {
2986
+ "rep": "DELTA",
2987
+ "type": "EEF",
2988
+ "format": "DEFAULT",
2989
+ "state_key": null
2990
+ },
2991
+ {
2992
+ "rep": "DELTA",
2993
+ "type": "EEF",
2994
+ "format": "DEFAULT",
2995
+ "state_key": null
2996
+ },
2997
+ {
2998
+ "rep": "ABSOLUTE",
2999
+ "type": "NON_EEF",
3000
+ "format": "DEFAULT",
3001
+ "state_key": null
3002
+ }
3003
+ ]
3004
+ },
3005
+ "language": {
3006
+ "delta_indices": [
3007
+ 0
3008
+ ],
3009
+ "modality_keys": [
3010
+ "annotation.human.action.task_description"
3011
+ ],
3012
+ "sin_cos_embedding_keys": null,
3013
+ "mean_std_embedding_keys": null,
3014
+ "action_configs": null
3015
+ }
3016
+ }
3017
+ },
3018
+ "random_rotation_angle": null,
3019
+ "color_jitter_params": {
3020
+ "brightness": 0.3,
3021
+ "contrast": 0.4,
3022
+ "saturation": 0.5,
3023
+ "hue": 0.08
3024
+ },
3025
+ "model_name": "RLWRLD/RLDX-1-VLM",
3026
+ "model_type": "vtc_qwen3_vl",
3027
+ "formalize_language": true,
3028
+ "max_state_dim": 64,
3029
+ "max_action_dim": 64,
3030
+ "max_action_horizon": 16,
3031
+ "use_percentiles": true,
3032
+ "clip_outliers": true,
3033
+ "apply_sincos_state_encoding": false,
3034
+ "use_relative_action": true,
3035
+ "memory_length": 4,
3036
+ "general_embodiment_train_ratio": 0,
3037
+ "allow_missing_physics": true,
3038
+ "physics_keys": [
3039
+ "tactile",
3040
+ "torque"
3041
+ ],
3042
+ "physics_dims": [
3043
+ 15,
3044
+ 7
3045
+ ],
3046
+ "random_crop_fraction": 1.0
3047
+ }
3048
+ }
statistics.json ADDED
The diff for this file is too large to render. See raw diff
 
teaser.png ADDED

Git LFS Details

  • SHA256: 6b34b11f6c8e2699766e26aa210be9e4b3e5f3f8f45ed009ae5c7ef07c7c7cd7
  • Pointer size: 133 Bytes
  • Size of remote file: 10.4 MB