madokalif commited on Mar 14

Commit

1cd30e7

verified ·

1 Parent(s): 28a050c

Upload checkpoint-4500 (step 4500/16550)

Browse files

Files changed (17) hide show

.gitattributes +2 -0
checkpoint-4500/action_tokenizer.py +431 -0
checkpoint-4500/adapter_config.json +43 -0
checkpoint-4500/adapter_model.safetensors +3 -0
checkpoint-4500/global_step4499/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4499/mp_rank_00_model_states.pt +3 -0
checkpoint-4500/latest +1 -0
checkpoint-4500/preprocessor_config.json +28 -0
checkpoint-4500/processing_spatialvla.py +254 -0
checkpoint-4500/processor_config.json +3827 -0
checkpoint-4500/rng_state.pth +3 -0
checkpoint-4500/special_tokens_map.json +39 -0
checkpoint-4500/tokenizer.json +3 -0
checkpoint-4500/tokenizer_config.json +0 -0
checkpoint-4500/trainer_state.json +3 -0
checkpoint-4500/training_args.bin +3 -0
checkpoint-4500/zero_to_fp32.py +674 -0

.gitattributes CHANGED Viewed

@@ -43,3 +43,5 @@ checkpoint-3500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-3500/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-4000/trainer_state.json filter=lfs diff=lfs merge=lfs -text

 checkpoint-3500/trainer_state.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-4000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 checkpoint-4000/trainer_state.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-4500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+checkpoint-4500/trainer_state.json filter=lfs diff=lfs merge=lfs -text

checkpoint-4500/action_tokenizer.py ADDED Viewed

	@@ -0,0 +1,431 @@

+"""
+action_tokenizer.py
+Extension class; wraps base LLM/VLM tokenizer with logic to discretize and tokenize continuous robot actions.
+"""
+from typing import List, Union, Dict, Optional
+import numpy as np
+from transformers import PreTrainedTokenizerBase
+from scipy.stats import norm
+import torch
+ACTION_TOKEN = '<ACTION{:05d}>'
+class ActionTokenizer:
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_bins: int = 256,
+        min_action: int = -1,
+        max_action: int = 1,
+    ):
+        self._vocab_size = num_bins
+        self.tokenizer = tokenizer
+        self.min_action, self.max_action = min_action, max_action
+        self.bin_centers = np.linspace(min_action, max_action, num_bins)
+        # add special action tokens to language tokenizer
+        token_list = [ACTION_TOKEN.format(i) for i in range(self._vocab_size)]
+        self.token_array = np.array(token_list)
+        num_new_tokens = self.tokenizer.add_tokens(token_list, special_tokens=True)
+        print(f"Add {num_new_tokens} TRANSLATION TOKENS, tokenizer vocab size {self.tokenizer.vocab_size} / {len(tokenizer)}")
+        self.action_token_begin_idx = self.token_start_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[0])
+        self.token_end_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[-1])
+    def __call__(self, action: np.ndarray) -> List[str]:
+        """Discretize continuous actions to tokens.
+        action: np.ndarray, (n, 7), continuous actions in Cartesian or Spherical coordinates.
+        return: np.ndarray, (n, 7), tokens.
+        """
+        action = np.clip(action, a_min=float(self.min_action), a_max=float(self.max_action))
+        ids = np.digitize(action, self.bin_centers, right=True)  # [0, 255]
+        return self.token_array[ids]
+    def decode_token_ids_to_actions(self, action_token_id: np.ndarray) -> np.ndarray:
+        """decode token ids to continuous actions.
+        action_token_id: np.ndarray, (n, 7), token ids.
+        return: np.ndarray, (n, 7), continuous actions
+        """
+        ids = action_token_id - self.action_token_begin_idx
+        ids = np.clip(ids, a_min=0, a_max=self._vocab_size - 1)
+        return self.bin_centers[ids]
+    @property
+    def vocab_size(self) -> int:
+        return self._vocab_size
+class TranslationTokenizer:
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_bins: Dict,
+        bin_policy: Optional[Dict] = None,
+        use_spherical: bool = True,
+    ):
+        self.tokenizer = tokenizer
+        self.num_theta_bins = num_bins["theta_bins"]
+        self.num_phi_bins = num_bins["phi_bins"]
+        self.num_r_bins = num_bins["r_bins"]
+        self.use_spherical = use_spherical
+        # for indexing
+        self.NP = self.num_phi_bins * self.num_r_bins
+        # add special action tokens to language tokenizer
+        self._vocab_size = self.num_theta_bins * self.num_phi_bins * self.num_r_bins
+        token_list = [ACTION_TOKEN.format(i) for i in range(self._vocab_size)]
+        self.token_array = np.array(token_list)
+        num_new_tokens = self.tokenizer.add_tokens(token_list, special_tokens=True)
+        print(f"Add {num_new_tokens} TRANSLATION TOKENS, tokenizer vocab size {self.tokenizer.vocab_size} / {len(tokenizer)}")
+        self.token_start_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[0])
+        self.token_end_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[-1])
+        self.set_bins(bin_policy)
+    def set_bins(self, bin_policy):
+        self.theta_bins = np.array(bin_policy["theta_bins"])
+        self.phi_bins = np.array(bin_policy["phi_bins"])
+        self.r_bins = np.array(bin_policy["r_bins"])
+    def cartesian_to_spherical(self, x, y, z):
+        theta = np.arctan2(np.sqrt(x**2 + y**2), z)  # polar angle
+        phi = np.arctan2(y, x)  # azimuthal angle
+        r = np.sqrt(x**2 + y**2 + z**2)
+        return theta, phi, r
+    def spherical_to_cartesian(self, theta, phi, r):
+        x = r * np.sin(theta) * np.cos(phi)
+        y = r * np.sin(theta) * np.sin(phi)
+        z = r * np.cos(theta)
+        return x, y, z
+    def __call__(self, action: np.ndarray) -> List[str]:
+        """Discretize continuous actions to tokens.
+        action: np.ndarray, (n, 3), continuous actions in Cartesian or Spherical coordinates.
+        return: np.ndarray, (n,), tokens.
+        """
+        if self.use_spherical:
+            theta, phi, r = self.cartesian_to_spherical(action[:, 0], action[:, 1], action[:, 2])
+        else:
+            theta, phi, r = action[:, 0], action[:, 1], action[:, 2]
+        disc_theta = np.digitize(theta, self.theta_bins[1:-1]) # b
+        disc_phi = np.digitize(phi, self.phi_bins[1:-1])
+        disc_r = np.digitize(r, self.r_bins[1:-1])
+        ids = disc_theta * self.NP + disc_phi * self.num_r_bins + disc_r
+        return self.token_array[ids]
+    def decode_token_ids_to_actions(self, action_token_id: np.ndarray) -> np.ndarray:
+        """decode token ids to continuous actions.
+        action_token_id: np.ndarray, (n,), token ids.
+        return: np.ndarray, (n, 3), continuous actions
+        """
+        action_token_id = np.clip(action_token_id, self.token_start_idx, self.token_end_idx)
+        ids = action_token_id - self.token_start_idx
+        disc_theta, disc_phi, disc_r = ids // self.NP, (ids % self.NP) // self.num_r_bins, ids % self.num_r_bins
+        theta = 0.5 * (self.theta_bins[disc_theta] + self.theta_bins[disc_theta + 1])
+        phi = 0.5 * (self.phi_bins[disc_phi] + self.phi_bins[disc_phi + 1])
+        r = 0.5 * (self.r_bins[disc_r] + self.r_bins[disc_r + 1])
+        # clip action to [-1, 1], due to the spherical coordinate action space is the circumscribed sphere of the Cartesian action space.
+        x, y, z = self.spherical_to_cartesian(theta, phi, r) if self.use_spherical else (theta, phi, r)
+        x, y, z = np.clip([x, y, z], -1, 1)
+        return np.stack((x, y, z), axis=1)
+    @property
+    def vocab_size(self) -> int:
+        return self._vocab_size
+class RotationTokenizer:
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_bins: Dict,
+        bin_policy: Optional[Dict] = None,
+        array_begin_idx=None,
+    ):
+        self.tokenizer = tokenizer
+        self.num_roll_bins = num_bins["roll_bins"] # M
+        self.num_pitch_bins = num_bins["pitch_bins"] # N
+        self.num_yaw_bins = num_bins["yaw_bins"] # P
+        self.array_begin_idx = array_begin_idx
+        # for indexing
+        self.NP = self.num_pitch_bins * self.num_yaw_bins
+        # add special action tokens to language tokenizer
+        self._vocab_size = self.num_roll_bins * self.num_pitch_bins * self.num_yaw_bins
+        token_list = [ACTION_TOKEN.format(i + self.array_begin_idx) for i in range(self._vocab_size)]
+        self.token_array = np.array(token_list)
+        num_new_tokens = self.tokenizer.add_tokens(token_list, special_tokens=True)
+        print(f"Add {num_new_tokens} ROTATION TOKENS to tokenizer, tokenizer vocab size {self.tokenizer.vocab_size} / {len(tokenizer)}")
+        self.token_start_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[0])
+        self.token_end_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[-1])
+        self.set_bins(bin_policy)
+    def set_bins(self, bin_policy):
+        self.roll_bins = np.array(bin_policy["roll_bins"])
+        self.pitch_bins = np.array(bin_policy["pitch_bins"])
+        self.yaw_bins = np.array(bin_policy["yaw_bins"])
+    def __call__(self, action: np.ndarray) -> List[str]:
+        """Discretize continuous actions to tokens.
+        action: np.ndarray, (n, 3), continuous actions in Cartesian or Spherical coordinates.
+        return: np.ndarray, (n,), tokens.
+        """
+        roll, pitch, yaw = action[:, 0], action[:, 1], action[:, 2]
+        disc_roll = np.clip(np.digitize(roll, self.roll_bins) - 1, 0, self.num_roll_bins - 1)
+        disc_pitch = np.clip(np.digitize(pitch, self.pitch_bins) - 1, 0, self.num_pitch_bins - 1)
+        disc_yaw = np.clip(np.digitize(yaw, self.yaw_bins) - 1, 0, self.num_yaw_bins - 1)
+        ids = disc_roll * self.NP + disc_pitch * self.num_yaw_bins + disc_yaw
+        return self.token_array[ids]
+    def decode_token_ids_to_actions(self, action_token_id: Union[np.int64, np.ndarray]) -> np.ndarray:
+        """decode token ids to continuous actions.
+        action_token_id: np.ndarray, (n,), token ids.
+        return: np.ndarray, (n, 3), continuous actions
+        """
+        action_token_id = np.clip(action_token_id, a_min=self.token_start_idx, a_max=self.token_end_idx)
+        ids = action_token_id - self.token_start_idx
+        disc_roll, disc_pitch, disc_yaw = ids // self.NP, (ids % self.NP) // self.num_yaw_bins, ids % self.num_yaw_bins
+        roll = 0.5 * (self.roll_bins[disc_roll] + self.roll_bins[disc_roll + 1])
+        pitch = 0.5 * (self.pitch_bins[disc_pitch] + self.pitch_bins[disc_pitch + 1])
+        yaw = 0.5 * (self.yaw_bins[disc_yaw] + self.yaw_bins[disc_yaw + 1])
+        return np.stack((roll, pitch, yaw), axis=1)
+    @property
+    def vocab_size(self) -> int:
+        return self._vocab_size
+class GripperTokenzier:
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_bins: int = 2,
+        array_begin_idx = None,
+    ) -> None:
+        self.tokenizer = tokenizer
+        self.num_bins = num_bins
+        self.array_begin_idx = array_begin_idx
+        token_list = [ACTION_TOKEN.format(i + self.array_begin_idx) for i in range(self.num_bins)]
+        self.token_array = np.array(token_list)
+        num_new_tokens = self.tokenizer.add_tokens(token_list, special_tokens=True)
+        print(f"Add {num_new_tokens} GRIPPER TOKENS to tokenizer, tokenizer vocab size {self.tokenizer.vocab_size} / {len(tokenizer)}")
+        self.token_start_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[0])
+        self.token_end_idx = self.tokenizer.convert_tokens_to_ids(self.token_array[-1])
+    def __call__(self, action: np.ndarray) -> List[str]:
+        """Discretize continuous actions to tokens.
+        action: np.ndarray, (n,), continuous actions in Cartesian or Spherical coordinates.
+        return: np.ndarray, (n,), tokens.
+        """
+        ids = np.where(action >= 0.5, 1, 0)
+        return self.token_array[ids]
+    def decode_token_ids_to_actions(self, action_token_id: np.ndarray) -> np.ndarray:
+        """decode token ids to continuous actions.
+        action_token_id: np.ndarray, (n,), token ids.
+        return: np.ndarray, (n, 1), continuous actions
+        """
+        action_token_id = np.clip(action_token_id, self.token_start_idx, self.token_end_idx)
+        ids = action_token_id - self.token_start_idx
+        actions = np.where(ids == 0, 0., 1.)
+        return actions[:, None]
+    @property
+    def vocab_size(self) -> int:
+        return self.num_bins
+class SpatialActionTokenizer:
+    range_bins = {
+        "translation": {
+            "theta_bins": (0.0, np.pi),
+            "phi_bins": (-np.pi, np.pi),
+            "r_bins": (0.0, np.sqrt(3)),
+        },
+        "rotation": {
+            "roll_bins": (-1.0, 1.0),
+            "pitch_bins": (-1.0, 1.0),
+            "yaw_bins": (-1.0, 1.0),
+        },
+    }
+    def __init__(
+        self,
+        tokenizer: PreTrainedTokenizerBase,
+        num_bins: Dict,
+        gs_params: Dict = None,
+        bin_policy: Dict = None,
+        use_spherical: bool = True,
+        min_sigma: float = 0.0,
+        min_action: float = -1.0,
+        max_action: float = 1.0,
+    ):
+        """set bin_policy if exist, otherwise, caculate bin_policy from gs_params or use uniform bin grids.
+        gs_params: Optional[Dict],
+        bin_policy: Optional[Dict],
+        """
+        self.tokenizer = tokenizer
+        self.min_action, self.max_action = min_action, max_action
+        self.num_bins = num_bins
+        self.min_sigma = min_sigma
+        # set bin policy
+        self.bin_policy = bin_policy if bin_policy else self.get_bin_policy(gs_params, self.min_sigma)
+        self.translation_tokenizer = TranslationTokenizer(
+            self.tokenizer,
+            self.num_bins["translation"],
+            self.bin_policy["translation"],
+            use_spherical=use_spherical
+        )
+        self.rotation_tokenizer = RotationTokenizer(
+            self.tokenizer,
+            self.num_bins["rotation"],
+            self.bin_policy["rotation"],
+            array_begin_idx=self.translation_tokenizer.vocab_size,
+        )
+        self.gripper_tokenizer = GripperTokenzier(
+            self.tokenizer,
+            self.num_bins["gripper"],
+            array_begin_idx=self.translation_tokenizer.vocab_size + self.rotation_tokenizer.vocab_size
+        )
+        self._vocab_size = self.translation_tokenizer.vocab_size + self.rotation_tokenizer.vocab_size + self.gripper_tokenizer.vocab_size
+    def __call__(self, action: np.ndarray) -> List[str]:
+        """Discretize continuous actions to tokens.
+        action: np.ndarray, (n, 7), continuous actions in Cartesian coordinates.
+        return: np.ndarray, (n, 3), tokens.
+        """
+        if len(action.shape) == 1:
+            assert action.shape[0] == 7, f"action dim mismatch, got action shape: {action.shape}"
+            action = action.reshape(1, 7)
+        assert action.shape[1] == 7, f"action dim mismatch, got action shape: {action.shape}"
+        action = np.clip(action, a_min=self.min_action, a_max=self.max_action)
+        trans_tokens = self.translation_tokenizer(action[:, :3]) # (n,)
+        rot_tokens = self.rotation_tokenizer(action[:, 3:6]) # (n,)
+        grip_tokens = self.gripper_tokenizer(action[:, 6]) # (n,)
+        return np.stack((trans_tokens, rot_tokens, grip_tokens), axis=1) # (n, 3)
+    def decode_token_ids_to_actions(self, action_token_ids: np.ndarray) -> np.ndarray:
+        """decode token ids to continuous actions.
+        action_token_ids: np.ndarray, (n, 3), token ids.
+        """
+        if len(action_token_ids.shape) == 1:
+            assert action_token_ids.shape[0] == 3, f"action token id numbers mismatich, need 3 got {action_token_ids.shape[0]}"
+            action_token_ids = action_token_ids.reshape(1, 3)
+        assert action_token_ids.shape[1] == 3, f"token id numbers mismatich, need 3 got {action_token_ids.shape[1]}"
+        trans_action = self.translation_tokenizer.decode_token_ids_to_actions(action_token_ids[:, 0]) # (n, 3)
+        rot_action = self.rotation_tokenizer.decode_token_ids_to_actions(action_token_ids[:, 1]) # (n, 3)
+        grip_action = self.gripper_tokenizer.decode_token_ids_to_actions(action_token_ids[:, 2]) # (n, 1)
+        return np.concatenate((trans_action, rot_action, grip_action), axis=1) # (n, 7)
+    @property
+    def vocab_size(self) -> int:
+        return self._vocab_size
+    @property
+    def action_token_begin_idx(self) -> int:
+        return self.translation_tokenizer.token_start_idx
+    def get_bin_policy(self, gs_params=None, min_sigma=0.0):
+        bin_policy = {
+            "translation": {"theta_bins": None, "phi_bins": None, "r_bins": None},
+            "rotation": {"roll_bins": None, "pitch_bins": None, "yaw_bins": None}
+        }
+        if gs_params is None:
+            for bin_type in self.range_bins.keys():
+                for bin_key in self.range_bins[bin_type].keys():
+                    bin_policy[bin_type][bin_key] = np.linspace(*self.range_bins[bin_type][bin_key], self.num_bins[bin_type][bin_key] + 1)
+            print(f"use unifrom bin grids ... \n{bin_policy}")
+        else:
+            for bin_type in self.range_bins.keys():
+                for bin_key in self.range_bins[bin_type].keys():
+                    mu = gs_params[bin_key.split("_")[0].lower()]["mu"]
+                    sigma = max(gs_params[bin_key.split("_")[0].lower()]["sigma"], min_sigma)
+                    bin_bound_prob = np.linspace(
+                        norm.cdf(self.range_bins[bin_type][bin_key][0], loc=mu, scale=sigma),
+                        norm.cdf(self.range_bins[bin_type][bin_key][1], loc=mu, scale=sigma),
+                        self.num_bins[bin_type][bin_key] + 1,
+                    )
+                    bin_boundary = norm.ppf(bin_bound_prob, loc=mu, scale=sigma)
+                    bin_policy[bin_type][bin_key] = np.clip(
+                            bin_boundary,
+                            self.range_bins[bin_type][bin_key][0],
+                            self.range_bins[bin_type][bin_key][1],
+                        ).tolist() # for serialize
+            print(f"caculate bin grids from gaussians \n{bin_policy}")
+        return bin_policy
+    def get_norm_meshgrid(self, bin_policy):
+        grids = []
+        policy = {k1: {k2: np.array(v2) for k2, v2 in v1.items()} for k1, v1 in bin_policy.items()}
+        # NOTE: use unify k,v order of range_bins (tpr, rpy)
+        for bin_type in self.range_bins.keys():
+            bounds = []
+            for bin_key in self.range_bins[bin_type].keys():
+                minb, maxb = self.range_bins[bin_type][bin_key][0], self.range_bins[bin_type][bin_key][1]
+                bin_boundary = policy[bin_type][bin_key]
+                bin_center = (bin_boundary[:-1] + bin_boundary[1:]) / 2
+                bin_center = np.concatenate([np.array([minb]),bin_center,np.array([maxb])]) # padding
+                bin_center = (bin_center - minb) /  (maxb - minb) # nomalize (m, n, k)
+                bounds.append(bin_center)
+            # generate grids
+            grid_x, grid_y, grid_z = np.meshgrid(*bounds)
+            grids += [np.stack([grid_x, grid_y, grid_z], -1).reshape(-1, 3)]
+        return grids[0], grids[1] # (N, 3)
+    def spatial_embedding_adaption(self, gs_params, embeddings: torch.nn.Embedding, min_sigma=0.0, adpt_feature=False):
+        """
+        gs_params0, gs_params1: Dict
+        embeddings: tensor (S,E)
+        """
+        from scipy.interpolate import griddata
+        new_policy = self.get_bin_policy(gs_params, min_sigma=min_sigma)
+        trans_grids0, rot_grids0 = self.get_norm_meshgrid(self.bin_policy)
+        trans_grids1, rot_grids1 = self.get_norm_meshgrid(new_policy)
+        print("overwrite bin policy and tokenizer bins ...")
+        self.bin_policy = new_policy
+        self.min_sigma = min_sigma
+        self.translation_tokenizer.set_bins(new_policy["translation"])
+        self.rotation_tokenizer.set_bins(new_policy["rotation"])
+        if adpt_feature:
+            emb_data = embeddings.weight.data # (S, e)
+            _, E = emb_data.shape
+            # translation
+            m, n, k = (self.num_bins["translation"][k] for k in ["theta_bins", "phi_bins", "r_bins"])
+            N = m*n*k
+            trans_emb_data = emb_data[:N,].reshape(m, n, k, -1).permute(3, 0, 1, 2) # (e, m, n, k)
+            pad_emb = torch.nn.functional.pad(trans_emb_data, (1, 1, 1, 1, 1, 1), "replicate").permute(1, 2, 3, 0).reshape(-1, E)
+            adpt_trans_emb = griddata(trans_grids0, pad_emb.float(), trans_grids1, method='linear')
+            adpt_trans_emb = adpt_trans_emb.reshape(m+2, n+2, k+2, E)[1:-1, 1:-1, 1:-1,]
+            # rotation
+            m1, n1, k1 = (self.num_bins["rotation"][k] for k in ["roll_bins", "pitch_bins", "yaw_bins"])
+            M = m1*n1*k1
+            rot_emb_data = emb_data[N : N + M,].reshape(m1, n1, k1, -1).permute(3, 0, 1, 2) # (e, m, n, k)
+            pad_emb = torch.nn.functional.pad(rot_emb_data, (1, 1, 1, 1, 1, 1), "replicate").permute(1, 2, 3, 0).reshape(-1, E)
+            adpt_rot_emb = griddata(rot_grids0, pad_emb.float(), rot_grids1, method='linear')
+            adpt_rot_emb = adpt_rot_emb.reshape(m1+2, n1+2, k1+2, E)[1:-1, 1:-1, 1:-1,]
+            # set data
+            device, dtype = embeddings.weight.data.device, embeddings.weight.data.dtype
+            embeddings.weight.data[:N] = torch.Tensor(adpt_trans_emb.reshape(-1, E), device=device).to(dtype)
+            embeddings.weight.data[N:N+M] = torch.Tensor(adpt_rot_emb.reshape(-1, E), device=device).to(dtype)
+            print("DONE! adapt spatial embedding to new gaussian distributation finished.")
+            print(embeddings.weight.data)

checkpoint-4500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "IPEC-COMMUNITY/spatialvla-4b-224-pt",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": "gaussian",
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [],
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "fc1",
+    "up_proj",
+    "linear",
+    "position_embedding_head.0",
+    "o_proj",
+    "k_proj",
+    "q_proj",
+    "fc2",
+    "out_proj",
+    "position_embedding_head.3",
+    "gate_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-4500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d58ff8bca30ca305375721fbcaab299fc732bf7f6f328726a0bd6bfc7cbcc11d
+size 118478120

checkpoint-4500/global_step4499/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4f0d2ab40b2872a2c25686d6ffa6927cd1c00e5f9340d6adf3b423068e4b739
+size 710309328

checkpoint-4500/global_step4499/mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:053856c95790d845b489fe613dffce8fc70a90e38f0c6226b3d998ad6e265f11
+size 119108780

checkpoint-4500/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step4499

checkpoint-4500/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "auto_map": {
+    "AutoProcessor": "processing_spatialvla.SpatialVLAProcessor"
+  },
+  "do_convert_rgb": null,
+  "do_normalize": false,
+  "do_rescale": true,
+  "do_resize": true,
+  "image_mean": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "image_processor_type": "SiglipImageProcessor",
+  "image_seq_length": 256,
+  "image_std": [
+    0.5,
+    0.5,
+    0.5
+  ],
+  "processor_class": "SpatialVLAProcessor",
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "height": 224,
+    "width": 224
+  }
+}

checkpoint-4500/processing_spatialvla.py ADDED Viewed

	@@ -0,0 +1,254 @@

+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import logging
+from typing import List, Optional, Union, Dict
+import numpy as np
+import torch
+from transformers.feature_extraction_utils import BatchFeature
+from transformers.image_utils import ImageInput, is_valid_image
+from transformers.processing_utils import Unpack, _validate_images_text_input_order, ProcessorMixin
+from transformers.tokenization_utils_base import AddedToken, PreTokenizedInput, TextInput
+from transformers.utils import logging
+from transformers.models.paligemma.processing_paligemma import (
+    make_batched_images,
+    build_string_from_input,
+    _is_str_or_image,
+    PaliGemmaProcessorKwargs,
+    IMAGE_TOKEN,
+    EXTRA_TOKENS
+)
+from .action_tokenizer import SpatialActionTokenizer
+logger = logging.get_logger(__name__)
+class SpatialVLAProcessor(ProcessorMixin):
+    attributes = ["image_processor", "tokenizer"]
+    valid_kwargs = ["chat_template"]
+    image_processor_class = "SiglipImageProcessor"
+    tokenizer_class = ("GemmaTokenizer", "GemmaTokenizerFast")
+    def __init__(
+        self,
+        image_processor=None,
+        tokenizer=None,
+        chat_template=None,
+        statistics: Optional[dict] = None,
+        bin_policy=None,
+        intrinsic_config=None,
+        action_config=None,
+        num_obs_steps=1,
+        obs_delta=1,
+        action_chunk_size=1,
+        min_sigma=0.0,
+        **kwargs,
+    ):
+        if image_processor is None:
+            raise ValueError("You need to specify an `image_processor`.")
+        if tokenizer is None:
+            raise ValueError("You need to specify a `tokenizer`.")
+        if not hasattr(image_processor, "image_seq_length"):
+            raise ValueError("Image processor is missing an `image_seq_length` attribute.")
+        self.image_seq_length = image_processor.image_seq_length
+        if not hasattr(tokenizer, "image_token"):
+            image_token = AddedToken(IMAGE_TOKEN, normalized=False, special=True)
+            tokens_to_add = {"additional_special_tokens": [image_token]}
+            tokenizer.add_special_tokens(tokens_to_add)
+            self.image_token_id = tokenizer.convert_tokens_to_ids(IMAGE_TOKEN)
+        else:
+            self.image_token_id = tokenizer.image_token_id
+        tokenizer.add_tokens(EXTRA_TOKENS)
+        tokenizer.add_bos_token = False
+        tokenizer.add_eos_token = False
+        super().__init__(image_processor, tokenizer, chat_template=chat_template)
+        # action tokenizer
+        self.statistics = statistics if statistics else {}
+        self.bin_policy = bin_policy
+        self.min_sigma = min_sigma
+        self.intrinsic_config = intrinsic_config
+        self.action_config = action_config
+        self.num_obs_steps = num_obs_steps
+        self.obs_delta = obs_delta
+        self.action_chunk_size = action_chunk_size
+        self.dataset_intrinsics = {}
+        height, width = image_processor.size["height"], image_processor.size["width"]
+        # scale intrinsic matrix
+        for k, v in intrinsic_config.items():
+            K = torch.tensor(v["intrinsic"]).float()
+            K[:2] *= torch.tensor([width / v["width"], height / v["height"]])[:, None]
+            self.dataset_intrinsics[k] = K
+        self.action_tokenizer = SpatialActionTokenizer(
+            tokenizer=tokenizer, num_bins=action_config["num_bins"],
+            bin_policy=bin_policy, use_spherical=action_config["use_spherical"],
+            min_sigma=min_sigma,
+        )
+    def __call__(
+        self,
+        images: ImageInput = None,
+        text: Union[TextInput, PreTokenizedInput, List[TextInput], List[PreTokenizedInput]] = None,
+        unnorm_key: Optional[str] = None,
+        suffix_actions: Optional[np.array] = None, # (t e)
+        **kwargs: Unpack[PaliGemmaProcessorKwargs],
+    ) -> BatchFeature:
+        images, text = _validate_images_text_input_order(images, text)
+        output_kwargs = self._merge_kwargs(
+            PaliGemmaProcessorKwargs,
+            tokenizer_init_kwargs=self.tokenizer.init_kwargs,
+            **kwargs,
+        )
+        if suffix_actions is not None:
+            action_tokens = self.action_tokenizer(suffix_actions) # (n,3)
+            suffix="".join(action_tokens.flatten())
+        else:
+            suffix = output_kwargs["text_kwargs"].pop("suffix", None)
+        return_token_type_ids = True if suffix is not None else False
+        if images is None:
+            raise ValueError("`images` are expected as arguments to a `PaliGemmaProcessor` instance.")
+        if text is None:
+            logger.warning_once( "You are using PaliGemma without a text prefix. It will perform as a picture-captioning model.")
+            text = ""
+        if _is_str_or_image(text):
+            text = [text]
+        elif isinstance(text, list) and _is_str_or_image(text[0]):
+            pass
+        if text is not None and images is not None:
+            if not any(IMAGE_TOKEN in sample for sample in text):
+                if isinstance(text, List) and isinstance(images, List):
+                    if len(images) != len(text):
+                        raise ValueError(
+                            f"Received {len(images)} images for {len(text)} prompts. Each prompt should be associated with an image or list of images."
+                        )
+                if is_valid_image(images):
+                    images = [[images]]
+                elif isinstance(images, list) and is_valid_image(images[0]):
+                    images = [[image] for image in images]
+                elif not (isinstance(images, list) and isinstance(images[0], list) and is_valid_image(images[0][0])):
+                    raise ValueError("images must be an image, list of images or list of list of images")
+                if suffix is not None and _is_str_or_image(suffix): suffix = [suffix]
+                if suffix is not None: suffix = [sfx + self.tokenizer.eos_token for sfx in suffix]
+                input_strings = [
+                    build_string_from_input(
+                        prompt=prompt,
+                        bos_token=self.tokenizer.bos_token,
+                        image_seq_len=self.image_seq_length,
+                        image_token=IMAGE_TOKEN,
+                        num_images=len(image_list) if isinstance(image_list, list) else 1,
+                    )
+                    for prompt, image_list in zip(text, images)
+                ]
+                images = make_batched_images(images)
+            else:
+                expanded_samples = []
+                for sample in text:
+                    expanded_sample = sample.replace(IMAGE_TOKEN, IMAGE_TOKEN * self.image_seq_length)
+                    bos_rfind_index = expanded_sample.rfind(IMAGE_TOKEN)
+                    bos_index = bos_rfind_index + len(IMAGE_TOKEN) if bos_rfind_index != -1 else 0
+                    expanded_sample = (
+                        expanded_sample[:bos_index] + self.tokenizer.bos_token + expanded_sample[bos_index:]
+                    )
+                    expanded_samples.append(expanded_sample)
+                input_strings = [f"{sample}\n" for sample in expanded_samples]
+        pixel_values = self.image_processor(images, **output_kwargs["images_kwargs"])["pixel_values"]
+        if output_kwargs["text_kwargs"].get("max_length", None) is not None:
+            output_kwargs["text_kwargs"]["max_length"] += self.image_seq_length
+        inputs = self.tokenizer(
+            input_strings,
+            text_pair=suffix,
+            return_token_type_ids=return_token_type_ids,
+            **output_kwargs["text_kwargs"],
+        )
+        intrinsic = self.dataset_intrinsics[unnorm_key] if unnorm_key in self.dataset_intrinsics else self.dataset_intrinsics["default"]
+        return_data = {**inputs, "pixel_values": pixel_values, "intrinsic": intrinsic}
+        if return_token_type_ids:
+            labels = inputs["input_ids"].masked_fill(inputs["token_type_ids"] == 0, -100)
+            return_data.update({"labels": labels})
+        return BatchFeature(data=return_data)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.batch_decode with CLIP->Gemma
+    def batch_decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
+        refer to the docstring of this method for more information.
+        """
+        return self.tokenizer.batch_decode(*args, **kwargs)
+    # Copied from transformers.models.clip.processing_clip.CLIPProcessor.decode with CLIP->Gemma
+    def decode(self, *args, **kwargs):
+        """
+        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
+        the docstring of this method for more information.
+        """
+        return self.tokenizer.decode(*args, **kwargs)
+    @property
+    def model_input_names(self):
+        tokenizer_input_names = self.tokenizer.model_input_names
+        image_processor_input_names = self.image_processor.model_input_names
+        return list(dict.fromkeys(tokenizer_input_names + image_processor_input_names))
+    def decode_actions(
+        self,
+        generation_outputs: torch.Tensor,
+        unnorm_key: Optional[str] = None,
+    ) -> Dict[str, torch.Tensor]:
+        action_token_num = 3  # translation + rotation + gripper
+        predicted_action_token_ids = generation_outputs[0, : action_token_num * self.action_chunk_size].detach().cpu().long().numpy()
+        assert self.tokenizer.eos_token != predicted_action_token_ids[-1], "[error] actions contain EOS token, please check you truncation settings!"
+        if predicted_action_token_ids.shape[0] < action_token_num * self.action_chunk_size:  # pad with zeros
+            logger.warning(f"Padding zero action!")
+            predicted_action_token_ids = np.concatenate(
+                [
+                    predicted_action_token_ids,
+                    np.zeros(action_token_num * self.action_chunk_size - predicted_action_token_ids.shape[0], dtype=np.longlong),
+                ]
+            )
+        predicted_action_token_ids = predicted_action_token_ids.reshape(-1, action_token_num)
+        normalized_action_chunks = self.action_tokenizer.decode_token_ids_to_actions(predicted_action_token_ids)
+        if unnorm_key is None:
+            logger.warning(f"unnorm_key {unnorm_key} is not in statistics, use next one")
+            unnorm_key = next(self.statistics.keys())
+        action_norm_stats = self.statistics[unnorm_key]["action"]
+        action_dim = len(action_norm_stats["q01"])
+        mask = np.array(action_norm_stats.get("mask", np.ones(action_dim)), dtype=bool)
+        action_high, action_low = np.array(action_norm_stats["q99"]), np.array(action_norm_stats["q01"])
+        actions = []
+        for normalized_actions in normalized_action_chunks:
+            action = np.where(
+                mask,
+                0.5 * (normalized_actions + 1) * (action_high - action_low) + action_low,
+                normalized_actions,
+            )
+            actions.append(action)
+        actions = np.stack(actions)
+        return {"actions": actions, "action_ids": predicted_action_token_ids}

checkpoint-4500/processor_config.json ADDED Viewed

	@@ -0,0 +1,3827 @@

+{
+  "action_chunk_size": 4,
+  "action_config": {
+    "distribution": "gaussian",
+    "num_bins": {
+      "gripper": 2,
+      "rotation": {
+        "pitch_bins": 16,
+        "roll_bins": 16,
+        "yaw_bins": 16
+      },
+      "total": 8194,
+      "translation": {
+        "phi_bins": 32,
+        "r_bins": 8,
+        "theta_bins": 16
+      }
+    },
+    "use_spherical": true
+  },
+  "auto_map": {
+    "AutoProcessor": "processing_spatialvla.SpatialVLAProcessor"
+  },
+  "bin_policy": {
+    "rotation": {
+      "pitch_bins": [
+        -1.0,
+        -0.6785015894338633,
+        -0.516796358161167,
+        -0.3978678314258641,
+        -0.29907867426319246,
+        -0.21158608510441518,
+        -0.13081651669135252,
+        -0.05392877158612959,
+        0.02113881590329744,
+        0.0961313749999302,
+        0.17278161860263358,
+        0.25310821063971767,
+        0.33985580585203445,
+        0.4373796767941653,
+        0.5539451994131283,
+        0.7100308525313351,
+        0.9999999999999999
+      ],
+      "roll_bins": [
+        -1.0,
+        -0.7121298287894609,
+        -0.5564581819056097,
+        -0.440071773405789,
+        -0.3426461358467384,
+        -0.25595819395001274,
+        -0.17566893098554964,
+        -0.09904102149491184,
+        -0.024059205927849478,
+        0.05100802578115137,
+        0.12790631705350436,
+        0.20869987492610076,
+        0.2962359118858219,
+        0.3951018734752948,
+        0.5141779624401348,
+        0.6762450862353777,
+        1.0
+      ],
+      "yaw_bins": [
+        -1.0,
+        -0.6910047644696934,
+        -0.5313988287371314,
+        -0.4133376866679583,
+        -0.3150057290436059,
+        -0.22777658299365705,
+        -0.14715771012527992,
+        -0.07034330907230311,
+        0.004712965738136004,
+        0.07975252682496348,
+        0.15651401950954372,
+        0.23703420508371892,
+        0.32409736463921823,
+        0.4221473708283458,
+        0.5396818128475004,
+        0.6980345545587262,
+        1.0
+      ]
+    },
+    "translation": {
+      "phi_bins": [
+        -3.1415926535897927,
+        -2.5597806593194092,
+        -2.1899702111786126,
+        -1.9071489188814448,
+        -1.6724463283141142,
+        -1.4683467869586326,
+        -1.2853487663890668,
+        -1.1176672338183495,
+        -0.961484031585327,
+        -0.8141204989748655,
+        -0.6736024210639718,
+        -0.5384120746595923,
+        -0.40733740832383114,
+        -0.279375002438531,
+        -0.15366425283265983,
+        -0.029440234757304742,
+        0.0940021938080639,
+        0.2173378027339352,
+        0.34123726674747146,
+        0.46639302836823826,
+        0.5935473848733163,
+        0.7235258808185444,
+        0.857280204661428,
+        0.9959469801163238,
+        1.1409329906705301,
+        1.2940454053271015,
+        1.4577019170652383,
+        1.6352913749303837,
+        1.8318407243899377,
+        2.0553733807372363,
+        2.320069275631962,
+        2.6552436426949604,
+        3.141592653589793
+      ],
+      "r_bins": [
+        2.220446049250313e-16,
+        0.19677118231539265,
+        0.3506298590504556,
+        0.4881976731379496,
+        0.621970275186659,
+        0.7620978861167458,
+        0.9228346010157172,
+        1.1393317208802278,
+        1.7320508075688767
+      ],
+      "theta_bins": [
+        0.0,
+        0.7067187338585303,
+        0.9814199309359143,
+        1.1752042640550222,
+        1.3331175751173345,
+        1.4713205387280388,
+        1.5977846301055496,
+        1.7172771763957553,
+        1.8331248472067783,
+        1.9480194771467687,
+        2.0644993054216925,
+        2.1853608246107656,
+        2.314189357400805,
+        2.456314355008026,
+        2.621028843347318,
+        2.828352346005421,
+        3.141592653589793
+      ]
+    }
+  },
+  "intrinsic_config": {
+    "bridge_orig/1.0.0": {
+      "height": 480,
+      "intrinsic": [
+        [
+          623.588,
+          0,
+          319.501
+        ],
+        [
+          0,
+          623.588,
+          239.545
+        ],
+        [
+          0,
+          0,
+          1
+        ]
+      ],
+      "width": 640
+    },
+    "default": {
+      "height": 480,
+      "intrinsic": [
+        [
+          623.588,
+          0,
+          319.501
+        ],
+        [
+          0,
+          623.588,
+          239.545
+        ],
+        [
+          0,
+          0,
+          1
+        ]
+      ],
+      "width": 640
+    }
+  },
+  "min_sigma": 0.0,
+  "num_obs_steps": 1,
+  "obs_delta": 1,
+  "processor_class": "SpatialVLAProcessor",
+  "statistics": {
+    "austin_buds_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.0,
+          1.0,
+          1.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "mean": [
+          -0.07678329944610596,
+          0.0036849123425781727,
+          0.05644941329956055,
+          0.0,
+          0.0,
+          0.0,
+          0.3510494828224182
+        ],
+        "min": [
+          -1.0,
+          -1.0,
+          -1.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          -1.0,
+          -0.9599999785423279,
+          -0.8714285492897034,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          1.0,
+          0.8600000143051147,
+          1.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "std": [
+          0.6367746591567993,
+          0.3788914680480957,
+          0.47796377539634705,
+          0.0,
+          0.0,
+          0.0,
+          0.4772108495235443
+        ]
+      },
+      "num_trajectories": 50,
+      "num_transitions": 34112,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "austin_sailor_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.0,
+          1.0,
+          1.0,
+          0.0,
+          0.0,
+          0.375,
+          1.0
+        ],
+        "mean": [
+          0.011825386434793472,
+          0.0064610871486365795,
+          0.060236409306526184,
+          0.0,
+          0.0,
+          0.0016465834341943264,
+          0.5260950326919556
+        ],
+        "min": [
+          -1.0,
+          -1.0,
+          -1.0,
+          0.0,
+          0.0,
+          -0.375,
+          0.0
+        ],
+        "q01": [
+          -1.0,
+          -0.9828571677207947,
+          -0.6000000238418579,
+          0.0,
+          0.0,
+          -0.17249999940395355,
+          0.0
+        ],
+        "q99": [
+          1.0,
+          0.9457142949104309,
+          1.0,
+          0.0,
+          0.0,
+          0.17892856895923615,
+          1.0
+        ],
+        "std": [
+          0.46348854899406433,
+          0.41240164637565613,
+          0.41186293959617615,
+          0.0,
+          0.0,
+          0.0578608438372612,
+          0.49893733859062195
+        ]
+      },
+      "num_trajectories": 240,
+      "num_transitions": 353094,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "austin_sirius_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.0002285242080688,
+          0.960608720779419,
+          1.105179786682129,
+          0.0,
+          0.0,
+          0.341785728931427,
+          1.0
+        ],
+        "mean": [
+          0.077476866543293,
+          0.031955525279045105,
+          0.04244735836982727,
+          0.0,
+          0.0,
+          -0.01603454165160656,
+          0.43260180950164795
+        ],
+        "min": [
+          -1.0183025598526,
+          -0.9800000190734863,
+          -0.9774575233459473,
+          0.0,
+          0.0,
+          -0.34607142210006714,
+          0.0
+        ],
+        "q01": [
+          -0.780905865430832,
+          -0.5667179036140442,
+          -0.5254343223571777,
+          0.0,
+          0.0,
+          -0.28495091378688814,
+          0.0
+        ],
+        "q99": [
+          0.9569637751579284,
+          0.6971374487876891,
+          0.8124888157844541,
+          0.0,
+          0.0,
+          0.1971428543329239,
+          1.0
+        ],
+        "std": [
+          0.3906330168247223,
+          0.2998153865337372,
+          0.2782270312309265,
+          0.0,
+          0.0,
+          0.08120641857385635,
+          0.49528202414512634
+        ]
+      },
+      "num_trajectories": 559,
+      "num_transitions": 279939,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "bc_z/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.2165454924106598,
+          0.1251407265663147,
+          0.10772687941789627,
+          0.33544227480888367,
+          0.28117990493774414,
+          0.40614867210388184,
+          1.0
+        ],
+        "mean": [
+          -0.009958645328879356,
+          0.0008958434336818755,
+          0.00499522453173995,
+          0.000297540333122015,
+          -0.008734511211514473,
+          -0.03068969026207924,
+          0.8344562649726868
+        ],
+        "min": [
+          -0.1677047461271286,
+          -0.14630407094955444,
+          -0.10066790133714676,
+          -0.29421567916870117,
+          -0.32101404666900635,
+          -0.4635624885559082,
+          0.0
+        ],
+        "q01": [
+          -0.09220654994249344,
+          -0.06456145539879798,
+          -0.049121275544166565,
+          -0.11594625547528267,
+          -0.14152548640966414,
+          -0.2251061636209488,
+          0.0
+        ],
+        "q99": [
+          0.07628866866230968,
+          0.058019736707210584,
+          0.052540797740221024,
+          0.11740604028105736,
+          0.11703975558280955,
+          0.16729306846857078,
+          1.0
+        ],
+        "std": [
+          0.030533093959093094,
+          0.0231416504830122,
+          0.020642085000872612,
+          0.04156165570020676,
+          0.04643021523952484,
+          0.07697845250368118,
+          0.36111101508140564
+        ]
+      },
+      "num_trajectories": 43264,
+      "num_transitions": 6015535,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "berkeley_autolab_ur5/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.019999999552965164,
+          0.019999999552965164,
+          0.019999999552965164,
+          0.06666667014360428,
+          0.06666667014360428,
+          0.06666667014360428,
+          1.0
+        ],
+        "mean": [
+          0.0005683613708242774,
+          0.0012176961172372103,
+          -0.0005296385497786105,
+          0.00021029777417425066,
+          6.069485243642703e-05,
+          0.0012049867073073983,
+          0.6298308372497559
+        ],
+        "min": [
+          -0.019999999552965164,
+          -0.019999999552965164,
+          -0.019999999552965164,
+          -0.06666667014360428,
+          -0.06666667014360428,
+          -0.06666667014360428,
+          0.0
+        ],
+        "q01": [
+          -0.019999999552965164,
+          -0.019999999552965164,
+          -0.019999999552965164,
+          -0.02628571353852749,
+          -0.06666667014360428,
+          -0.03847619146108627,
+          0.0
+        ],
+        "q99": [
+          0.019999999552965164,
+          0.019999999552965164,
+          0.019999999552965164,
+          0.031809523701667786,
+          0.06666667014360428,
+          0.036571428179740906,
+          1.0
+        ],
+        "std": [
+          0.011533073149621487,
+          0.007990497164428234,
+          0.009577799588441849,
+          0.009432999417185783,
+          0.016427574679255486,
+          0.011054049246013165,
+          0.482679545879364
+        ]
+      },
+      "num_trajectories": 1000,
+      "num_transitions": 97939,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "berkeley_cable_routing/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.9633283019065857,
+          1.0,
+          1.0,
+          0.0,
+          0.0,
+          1.0,
+          0.0
+        ],
+        "mean": [
+          -0.07139858603477478,
+          0.023608991876244545,
+          0.10241956263780594,
+          0.0,
+          0.0,
+          0.04967105761170387,
+          0.0
+        ],
+        "min": [
+          -0.9809081554412842,
+          -0.9554349184036255,
+          -0.9994775056838989,
+          0.0,
+          0.0,
+          -1.0,
+          0.0
+        ],
+        "q01": [
+          -0.5534318816661835,
+          -0.4797285574674606,
+          -0.5314934802055359,
+          0.0,
+          0.0,
+          -0.8855219376087189,
+          0.0
+        ],
+        "q99": [
+          0.42652835428714786,
+          0.5000944086909298,
+          0.639823433756829,
+          0.0,
+          0.0,
+          0.984243879914284,
+          0.0
+        ],
+        "std": [
+          0.18155010044574738,
+          0.18109896779060364,
+          0.21220752596855164,
+          0.0,
+          0.0,
+          0.3475516438484192,
+          0.0
+        ]
+      },
+      "num_trajectories": 1647,
+      "num_transitions": 42328,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "berkeley_fanuc_manipulation/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.009999999776482582,
+          0.009999999776482582,
+          0.009999999776482582,
+          0.03490658476948738,
+          0.03490658476948738,
+          0.03490658476948738,
+          1.0
+        ],
+        "mean": [
+          0.0007744057802483439,
+          -0.00031240080716088414,
+          -0.0015001941937953234,
+          -0.0007515158504247665,
+          -0.00015832878125365824,
+          0.00014327642566058785,
+          0.699295699596405
+        ],
+        "min": [
+          -0.009999999776482582,
+          -0.009999999776482582,
+          -0.009999999776482582,
+          -0.03490658476948738,
+          -0.03490658476948738,
+          -0.03490658476948738,
+          0.0
+        ],
+        "q01": [
+          -0.009999999776482582,
+          -0.009999999776482582,
+          -0.009999999776482582,
+          -0.03490658476948738,
+          0.0,
+          -0.03490658476948738,
+          0.0
+        ],
+        "q99": [
+          0.009999999776482582,
+          0.009999999776482582,
+          0.009999999776482582,
+          0.03490658476948738,
+          0.0,
+          0.03490658476948738,
+          1.0
+        ],
+        "std": [
+          0.0034070133697241545,
+          0.00499219074845314,
+          0.005344326142221689,
+          0.007599010597914457,
+          0.004081932827830315,
+          0.008568963967263699,
+          0.45868709683418274
+        ]
+      },
+      "num_trajectories": 415,
+      "num_transitions": 62613,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "bridge_orig/1.0.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.41691166162490845,
+          0.25864794850349426,
+          0.21218234300613403,
+          3.122201919555664,
+          1.8618112802505493,
+          6.280478477478027,
+          1.0
+        ],
+        "mean": [
+          0.00023341714404523373,
+          0.00013004327774979174,
+          -0.00012762591359205544,
+          -0.0001556579809403047,
+          -0.00040393328526988626,
+          0.00023558337124995887,
+          0.5764582753181458
+        ],
+        "min": [
+          -0.4007510244846344,
+          -0.13874775171279907,
+          -0.22553899884223938,
+          -3.2010786533355713,
+          -1.8618112802505493,
+          -6.279075622558594,
+          0.0
+        ],
+        "q01": [
+          -0.02872725307941437,
+          -0.04170349963009357,
+          -0.026093858778476715,
+          -0.08092105075716972,
+          -0.09288699507713317,
+          -0.20718276381492615,
+          0.0
+        ],
+        "q99": [
+          0.028309678435325586,
+          0.040855254605412394,
+          0.040161586627364146,
+          0.08192047759890528,
+          0.07792850524187081,
+          0.20382574498653397,
+          1.0
+        ],
+        "std": [
+          0.009765734896063805,
+          0.013689505867660046,
+          0.012667152099311352,
+          0.028534479439258575,
+          0.03063790127635002,
+          0.07691770792007446,
+          0.4973658621311188
+        ]
+      },
+      "num_trajectories": 60064,
+      "num_transitions": 2135463,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "cmu_stretch/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.02338407188653946,
+          0.0,
+          0.023404927924275398,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "mean": [
+          0.0003630445571616292,
+          0.0,
+          0.0016466928645968437,
+          0.0,
+          0.0,
+          0.0,
+          0.3987048268318176
+        ],
+        "min": [
+          -0.019353797659277916,
+          0.0,
+          -0.02019215188920498,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          -0.011175686959177256,
+          0.0,
+          -0.0032206363626755773,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.014501785952597848,
+          0.0,
+          0.015056106168776728,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "std": [
+          0.004081855062395334,
+          0.0,
+          0.003774340031668544,
+          0.0,
+          0.0,
+          0.0,
+          0.489638090133667
+        ]
+      },
+      "num_trajectories": 135,
+      "num_transitions": 25016,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "dlr_edan_shared_control_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.18991442024707794,
+          0.0739002525806427,
+          0.18064819276332855,
+          0.0866486132144928,
+          0.13464981317520142,
+          0.16910280287265778,
+          1.0
+        ],
+        "mean": [
+          0.0066478196531534195,
+          -0.0007657355745323002,
+          0.006522845011204481,
+          0.0011679773451760411,
+          -0.006395624950528145,
+          -0.011903021484613419,
+          0.6985887289047241
+        ],
+        "min": [
+          -0.10054297000169754,
+          -0.08427435159683228,
+          -0.13533438742160797,
+          -0.17556548118591309,
+          -0.18485672771930695,
+          -0.2680685818195343,
+          0.0
+        ],
+        "q01": [
+          -0.02987122368067503,
+          -0.06013262912631035,
+          -0.08286409199237824,
+          -0.05924444157630205,
+          -0.15986866518855095,
+          -0.15636983573436739,
+          0.0
+        ],
+        "q99": [
+          0.08832092039287087,
+          0.042126184627413736,
+          0.11311905644834042,
+          0.0643695573508739,
+          0.03941855944693088,
+          0.156646853685379,
+          1.0
+        ],
+        "std": [
+          0.021393585950136185,
+          0.018142299726605415,
+          0.03374377265572548,
+          0.01743541844189167,
+          0.03394372761249542,
+          0.04641878604888916,
+          0.45885783433914185
+        ]
+      },
+      "num_trajectories": 104,
+      "num_transitions": 8928,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "dobbe/0.0.1": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          38.590423583984375,
+          17.932697296142578,
+          4.843764305114746,
+          1.4372116327285767,
+          0.4340403974056244,
+          1.2057193517684937,
+          0.9998947381973267
+        ],
+        "mean": [
+          -0.00011206958151888102,
+          0.0011229681549593806,
+          -0.00010193959315074608,
+          -7.37128357286565e-05,
+          -0.0006753374473191798,
+          -5.664441778208129e-05,
+          0.6318688988685608
+        ],
+        "min": [
+          -5.700923442840576,
+          -21.605947494506836,
+          -123.72489929199219,
+          -1.7229845523834229,
+          -0.4998578727245331,
+          -0.8867913484573364,
+          1.4196479014572105e-06
+        ],
+        "q01": [
+          -0.01119564864784479,
+          -0.014266146533191203,
+          -0.0071747214533388615,
+          -0.009444301575422287,
+          -0.03990109823644161,
+          -0.017422311007976532,
+          4.003279136668425e-05
+        ],
+        "q99": [
+          0.01015154086053368,
+          0.017181577533483497,
+          0.007216989761218411,
+          0.010380979906767595,
+          0.03556173853576176,
+          0.018032474815845446,
+          0.9982578039169312
+        ],
+        "std": [
+          0.042660679668188095,
+          0.04428431764245033,
+          0.12224890291690826,
+          0.005388470832258463,
+          0.011246936395764351,
+          0.006288259290158749,
+          0.3973240256309509
+        ]
+      },
+      "num_trajectories": 5208,
+      "num_transitions": 1139911,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "droid/1.0.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.9999998211860657,
+          0.999991774559021,
+          0.9999973177909851,
+          0.9999874830245972,
+          0.9999954104423523,
+          0.9999998807907104,
+          1.0
+        ],
+        "mean": [
+          0.027425529435276985,
+          -0.0026820411439985037,
+          0.01595238223671913,
+          0.0035501928068697453,
+          -0.030532635748386383,
+          -0.006685464642941952,
+          0.5860344171524048
+        ],
+        "min": [
+          -0.9999999403953552,
+          -0.9999951124191284,
+          -0.9999960660934448,
+          -0.9999980330467224,
+          -0.9999982118606567,
+          -0.9999998807907104,
+          0.0
+        ],
+        "q01": [
+          -0.7776297926902771,
+          -0.5803514122962952,
+          -0.5795090794563293,
+          -0.6464047729969025,
+          -0.7041108310222626,
+          -0.8895104378461838,
+          0.0
+        ],
+        "q99": [
+          0.7597932070493698,
+          0.5726242214441299,
+          0.7351000607013702,
+          0.6705610305070877,
+          0.6464948207139969,
+          0.8897542208433151,
+          1.0
+        ],
+        "std": [
+          0.25387412309646606,
+          0.18426834046840668,
+          0.22532416880130768,
+          0.21757009625434875,
+          0.22572560608386993,
+          0.2867794930934906,
+          0.4287726879119873
+        ]
+      },
+      "num_trajectories": 92233,
+      "num_transitions": 27044326,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "fmb_dataset/1.0.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.399999976158142,
+          1.0,
+          1.399999976158142,
+          1.0,
+          1.0,
+          1.0,
+          1.0
+        ],
+        "mean": [
+          0.05902976542711258,
+          -0.06476633995771408,
+          -0.09787469357252121,
+          0.004325387068092823,
+          0.00028963759541511536,
+          -0.04457257315516472,
+          0.7336440086364746
+        ],
+        "min": [
+          -1.399999976158142,
+          -1.399999976158142,
+          -1.0,
+          -1.0,
+          -1.0,
+          -1.0,
+          0.0
+        ],
+        "q01": [
+          -0.8257142901420593,
+          -1.399999976158142,
+          -1.0,
+          -1.0,
+          -0.3028571307659149,
+          -1.0,
+          0.0
+        ],
+        "q99": [
+          1.0,
+          0.5257142782211304,
+          1.0,
+          1.0,
+          0.3400000035762787,
+          1.0,
+          1.0
+        ],
+        "std": [
+          0.28809186816215515,
+          0.2820416986942291,
+          0.4626740515232086,
+          0.3266514539718628,
+          0.10842999070882797,
+          0.34400978684425354,
+          0.4435289800167084
+        ]
+      },
+      "num_trajectories": 8612,
+      "num_transitions": 1137459,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "fractal20220817_data/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          2.9984593391418457,
+          22.09052848815918,
+          2.7507524490356445,
+          1.570636510848999,
+          1.5321086645126343,
+          1.5691522359848022,
+          1.0
+        ],
+        "mean": [
+          0.006987507455050945,
+          0.006265853065997362,
+          -0.012625162489712238,
+          0.04333285242319107,
+          -0.005756276659667492,
+          0.0009130403632298112,
+          0.5354204773902893
+        ],
+        "min": [
+          -2.0204520225524902,
+          -5.497899532318115,
+          -2.031663417816162,
+          -1.569917917251587,
+          -1.569892168045044,
+          -1.570419430732727,
+          0.0
+        ],
+        "q01": [
+          -0.22453527510166169,
+          -0.14820013284683228,
+          -0.231589707583189,
+          -0.3517994859814644,
+          -0.4193011274933815,
+          -0.43643461108207704,
+          0.0
+        ],
+        "q99": [
+          0.17824687153100965,
+          0.14938379630446405,
+          0.21842354819178575,
+          0.5892666035890578,
+          0.35272657424211445,
+          0.44796681255102094,
+          1.0
+        ],
+        "std": [
+          0.06921109557151794,
+          0.05970889702439308,
+          0.0735311210155487,
+          0.1561058759689331,
+          0.1316441297531128,
+          0.14593777060508728,
+          0.49711623787879944
+        ]
+      },
+      "num_trajectories": 87212,
+      "num_transitions": 3786400,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "furniture_bench_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.10000000149011612,
+          0.10000000149011612,
+          0.10000000149011612,
+          0.8651833534240723,
+          1.0909736156463623,
+          2.863185405731201,
+          1.0
+        ],
+        "mean": [
+          0.0001461071806261316,
+          0.0010830992832779884,
+          0.0006224963581189513,
+          -0.0033032014034688473,
+          -0.002688060747459531,
+          0.018242614343762398,
+          0.48854944109916687
+        ],
+        "min": [
+          -0.10495579987764359,
+          -0.10939455777406693,
+          -0.10000000149011612,
+          -0.971906840801239,
+          -1.0475432872772217,
+          -3.06000018119812,
+          0.0
+        ],
+        "q01": [
+          -0.053988199681043625,
+          -0.05049169331789017,
+          -0.032499241530895236,
+          -0.1953887003660202,
+          -0.41674559473991396,
+          -0.8886768388748169,
+          0.0
+        ],
+        "q99": [
+          0.05414841488003723,
+          0.04965164884924884,
+          0.060055799782276154,
+          0.18231668293476103,
+          0.39867786407470646,
+          0.8772023963928218,
+          1.0
+        ],
+        "std": [
+          0.016107233241200447,
+          0.014891570433974266,
+          0.014014236629009247,
+          0.05827433615922928,
+          0.11417083442211151,
+          0.33479660749435425,
+          0.4999157190322876
+        ]
+      },
+      "num_trajectories": 5100,
+      "num_transitions": 3948057,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "iamlab_cmu_pickup_insert_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.6634981632232666,
+          0.23428471386432648,
+          0.4308285415172577,
+          3.1415927410125732,
+          0.13647015392780304,
+          3.141592502593994,
+          1.0
+        ],
+        "mean": [
+          0.5274373292922974,
+          0.028582017868757248,
+          0.18712472915649414,
+          1.2339569330215454,
+          0.03226622939109802,
+          -1.4199472665786743,
+          0.5550631880760193
+        ],
+        "min": [
+          0.3071657121181488,
+          -0.29754969477653503,
+          0.06578229367733002,
+          -3.1415927410125732,
+          -0.04584203287959099,
+          -3.141592502593994,
+          0.0
+        ],
+        "q01": [
+          0.3148897051811218,
+          -0.20317550599575043,
+          0.06785467118024827,
+          -3.140952730178833,
+          -0.029743434861302376,
+          -3.141091251373291,
+          0.0
+        ],
+        "q99": [
+          0.6472805738449097,
+          0.20846802592277527,
+          0.36855655312538155,
+          3.1409926891326903,
+          0.11424950212240226,
+          3.1410969257354737,
+          1.0
+        ],
+        "std": [
+          0.08108346909284592,
+          0.1116756722331047,
+          0.07747555524110794,
+          2.8737244606018066,
+          0.02774704433977604,
+          2.7678685188293457,
+          0.4969509243965149
+        ]
+      },
+      "num_trajectories": 631,
+      "num_transitions": 146241,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "jaco_play/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.20000000298023224,
+          0.20000000298023224,
+          0.20000000298023224,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "mean": [
+          0.0009658387862145901,
+          -0.005800850689411163,
+          -0.003950685728341341,
+          0.0,
+          0.0,
+          0.0,
+          0.34934908151626587
+        ],
+        "min": [
+          -0.20000000298023224,
+          -0.20000000298023224,
+          -0.20000000298023224,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          -0.20000000298023224,
+          -0.20000000298023224,
+          -0.20000000298023224,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.20000000298023224,
+          0.20000000298023224,
+          0.20000000298023224,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "std": [
+          0.12234985828399658,
+          0.09678783267736435,
+          0.1115543395280838,
+          0.0,
+          0.0,
+          0.0,
+          0.47682321071624756
+        ]
+      },
+      "num_trajectories": 1085,
+      "num_transitions": 77965,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "kuka/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.1697135865688324,
+          0.2777623236179352,
+          0.43710532784461975,
+          0.0,
+          0.0,
+          1.9684287309646606,
+          1.0
+        ],
+        "mean": [
+          -0.00046687963185831904,
+          0.00040137648466043174,
+          -0.0012807906605303288,
+          0.0,
+          0.0,
+          -0.037225183099508286,
+          0.4131543040275574
+        ],
+        "min": [
+          -0.159867063164711,
+          -0.2892282009124756,
+          -0.2795473635196686,
+          0.0,
+          0.0,
+          -1.9875637292861938,
+          0.0
+        ],
+        "q01": [
+          -0.06619441494345665,
+          -0.08713878810405731,
+          -0.15083016991615295,
+          0.0,
+          0.0,
+          -0.5415697038173676,
+          0.0
+        ],
+        "q99": [
+          0.06601839080452929,
+          0.08732476785779003,
+          0.18168179214000715,
+          0.0,
+          0.0,
+          0.2923380345106127,
+          1.0
+        ],
+        "std": [
+          0.020832739770412445,
+          0.029158642515540123,
+          0.0642285868525505,
+          0.0,
+          0.0,
+          0.14224639534950256,
+          0.4908643662929535
+        ]
+      },
+      "num_trajectories": 209880,
+      "num_transitions": 2455879,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "language_table/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.23357294499874115,
+          0.24496802687644958,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "mean": [
+          0.00014891766477376223,
+          -0.0005636657006107271,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "min": [
+          -0.21989956498146057,
+          -0.23736150562763214,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "q01": [
+          -0.08179590478539467,
+          -0.11795833334326744,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "q99": [
+          0.08822273463010788,
+          0.1191693339496851,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          1.0
+        ],
+        "std": [
+          0.030162859708070755,
+          0.04230763390660286,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      },
+      "num_trajectories": 442226,
+      "num_transitions": 7045476,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "libero_spatial_no_noops/1.0.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.9375,
+          0.9375,
+          0.9375,
+          0.1971428543329239,
+          0.33642858266830444,
+          0.375,
+          1.0
+        ],
+        "mean": [
+          0.1531248390674591,
+          0.13707281649112701,
+          -0.1552678644657135,
+          -0.005176451988518238,
+          -0.01120878104120493,
+          -0.020194314420223236,
+          0.4578818082809448
+        ],
+        "min": [
+          -0.9375,
+          -0.9375,
+          -0.9375,
+          -0.1875,
+          -0.3675000071525574,
+          -0.36000001430511475,
+          0.0
+        ],
+        "q01": [
+          -0.7454732114076613,
+          -0.6616071462631226,
+          -0.9375,
+          -0.1071428582072258,
+          -0.20678570866584778,
+          -0.1842857152223587,
+          0.0
+        ],
+        "q99": [
+          0.9375,
+          0.8758928775787354,
+          0.9321428537368774,
+          0.1039285734295845,
+          0.17678570747375488,
+          0.14571428298950195,
+          1.0
+        ],
+        "std": [
+          0.4127269387245178,
+          0.34724509716033936,
+          0.5086917877197266,
+          0.03726619482040405,
+          0.07244456559419632,
+          0.05762358754873276,
+          0.49828025698661804
+        ]
+      },
+      "num_trajectories": 432,
+      "num_transitions": 52970,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "nyu_franka_play_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.06424188613891602,
+          0.07027634978294373,
+          0.06129661202430725,
+          6.281067848205566,
+          0.1967729926109314,
+          0.26377415657043457,
+          1.0
+        ],
+        "mean": [
+          0.0010219910182058811,
+          -0.00012002632865915075,
+          0.00032894135802052915,
+          0.0015034276293590665,
+          -0.002198528265580535,
+          -0.0016632305923849344,
+          0.7230083346366882
+        ],
+        "min": [
+          -0.05952230095863342,
+          -0.07232445478439331,
+          -0.06730806827545166,
+          -6.278434753417969,
+          -0.21479034423828125,
+          -0.3627619743347168,
+          0.0
+        ],
+        "q01": [
+          -0.03199600875377655,
+          -0.032861671447753905,
+          -0.03368805110454559,
+          -0.12080862045288086,
+          -0.12175218224525451,
+          -0.11370223641395569,
+          0.0
+        ],
+        "q99": [
+          0.03101520001888276,
+          0.0373908892273903,
+          0.03646374464035038,
+          0.11764093399047852,
+          0.1258920183777809,
+          0.09366151213645942,
+          1.0
+        ],
+        "std": [
+          0.013274150900542736,
+          0.013215919025242329,
+          0.01282210648059845,
+          0.27324533462524414,
+          0.05702253058552742,
+          0.03917279839515686,
+          0.44753193855285645
+        ]
+      },
+      "num_trajectories": 456,
+      "num_transitions": 44875,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "rh20t_rlds/1.0.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          7.582831568163597e+35,
+          7.557172735451728e+35,
+          2.2717764477020827e+27,
+          3.1415927410125732,
+          1.5116956233978271,
+          3.1415927410125732,
+          1.0
+        ],
+        "mean": [
+          -5.332157638779582e+28,
+          -1.5128827327837974e+29,
+          -1.832736619079747e+28,
+          0.5735913515090942,
+          -0.00847744569182396,
+          -0.5566052198410034,
+          0.3186892569065094
+        ],
+        "min": [
+          -3.5543094244408723e+36,
+          -8.723098019507117e+36,
+          -9.648338287048974e+35,
+          -3.1415927410125732,
+          -1.5062522888183594,
+          -3.1415927410125732,
+          0.0
+        ],
+        "q01": [
+          0.36028257966041566,
+          -0.272584410905838,
+          0.005985925104469062,
+          -3.1411514282226562,
+          -0.5925320792198181,
+          -3.1415159702301025,
+          0.0
+        ],
+        "q99": [
+          0.7534684538841248,
+          0.31738221645355225,
+          0.33061375379562374,
+          3.141425132751465,
+          0.47507260441780086,
+          3.141479730606079,
+          1.0
+        ],
+        "std": [
+          Infinity,
+          Infinity,
+          Infinity,
+          2.2581026554107666,
+          0.1548534482717514,
+          2.2581026554107666,
+          0.39917993545532227
+        ]
+      },
+      "num_trajectories": 104392,
+      "num_transitions": 52644433,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "roboturk/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.39124172925949097,
+          0.4601028263568878,
+          0.4870833456516266,
+          1.816888689994812,
+          1.8240282535552979,
+          1.4824820756912231,
+          1.0
+        ],
+        "mean": [
+          0.001444889116100967,
+          -0.0015945355407893658,
+          -0.0011753803119063377,
+          0.002301239175722003,
+          -0.0009382442804053426,
+          -0.00011485860886750743,
+          0.5746025443077087
+        ],
+        "min": [
+          -0.6546999216079712,
+          -0.6365841031074524,
+          -0.4217723608016968,
+          -1.6695482730865479,
+          -1.8023357391357422,
+          -1.4630827903747559,
+          0.0
+        ],
+        "q01": [
+          -0.1342635464668274,
+          -0.19996687173843383,
+          -0.1482972100377083,
+          -0.20720748245716095,
+          -0.09676413893699647,
+          -0.18075634717941286,
+          0.0
+        ],
+        "q99": [
+          0.14956976801157001,
+          0.1805950567126275,
+          0.18841815620660796,
+          0.21615413755178453,
+          0.09457383215427405,
+          0.18543301910162005,
+          1.0
+        ],
+        "std": [
+          0.0493537075817585,
+          0.06354564428329468,
+          0.06116492301225662,
+          0.0955340564250946,
+          0.08420011401176453,
+          0.06517910957336426,
+          0.4945177137851715
+        ]
+      },
+      "num_trajectories": 1995,
+      "num_transitions": 187507,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "stanford_hydra_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.02499854564666748,
+          0.02499903365969658,
+          0.024999922141432762,
+          0.24974457919597626,
+          0.24997030198574066,
+          0.24999946355819702,
+          1.0
+        ],
+        "mean": [
+          0.0007790043600834906,
+          0.00013707877951674163,
+          -0.000254859565757215,
+          0.0012903243768960238,
+          -0.004751724191009998,
+          0.002692892448976636,
+          0.48855218291282654
+        ],
+        "min": [
+          -0.024999044835567474,
+          -0.024999700486660004,
+          -0.02499929815530777,
+          -0.24993225932121277,
+          -0.2499666064977646,
+          -0.2499932497739792,
+          0.0
+        ],
+        "q01": [
+          -0.019992006458342076,
+          -0.02415412735193968,
+          -0.022941758055239916,
+          -0.11085530579090118,
+          -0.12024572037160397,
+          -0.13314770206809043,
+          0.0
+        ],
+        "q99": [
+          0.022886231057345868,
+          0.022358838934451335,
+          0.02410089675337076,
+          0.12370114490389822,
+          0.11323311634361738,
+          0.18474749639630164,
+          1.0
+        ],
+        "std": [
+          0.008022183552384377,
+          0.009131456725299358,
+          0.00957438349723816,
+          0.04122224077582359,
+          0.03843001648783684,
+          0.046067025512456894,
+          0.49978113174438477
+        ]
+      },
+      "num_trajectories": 570,
+      "num_transitions": 358234,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "taco_play/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.4915844202041626,
+          2.1842432022094727,
+          2.6836395263671875,
+          5.035226821899414,
+          2.665864944458008,
+          4.250768661499023,
+          1.0
+        ],
+        "mean": [
+          -0.0038459226489067078,
+          0.009671436622738838,
+          0.01278059184551239,
+          -0.0054037850350141525,
+          -0.009606562554836273,
+          -0.0024807206355035305,
+          0.4263913035392761
+        ],
+        "min": [
+          -4.242457866668701,
+          -3.192805051803589,
+          -1.3371467590332031,
+          -4.202683448791504,
+          -2.6722638607025146,
+          -3.3467135429382324,
+          0.0
+        ],
+        "q01": [
+          -0.7106140398979186,
+          -1.056944659948349,
+          -0.5878450274467468,
+          -0.7682853937149048,
+          -0.7180147767066956,
+          -1.5527938604354858,
+          0.0
+        ],
+        "q99": [
+          0.6482916426658629,
+          1.0051310062408447,
+          0.9480248689651489,
+          0.6926478147506714,
+          0.6351067513227462,
+          1.628010264635086,
+          1.0
+        ],
+        "std": [
+          0.23254045844078064,
+          0.3629826307296753,
+          0.2869291603565216,
+          0.261770635843277,
+          0.24388927221298218,
+          0.5216501355171204,
+          0.49469029903411865
+        ]
+      },
+      "num_trajectories": 3603,
+      "num_transitions": 237798,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "toto/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          0.6839867234230042,
+          0.4454185664653778,
+          0.7984078526496887,
+          2.120781660079956,
+          1.371164321899414,
+          1.4118704795837402,
+          0.0
+        ],
+        "mean": [
+          0.3854214549064636,
+          0.007769507821649313,
+          0.3632742166519165,
+          -0.665202796459198,
+          0.1890396624803543,
+          0.0329875648021698,
+          0.0
+        ],
+        "min": [
+          0.09922284632921219,
+          -0.5180193781852722,
+          0.13791072368621826,
+          -2.635117530822754,
+          -1.0734480619430542,
+          -1.9282547235488892,
+          0.0
+        ],
+        "q01": [
+          0.1756722891330719,
+          -0.3077590811252594,
+          0.235383919775486,
+          -2.0908505964279174,
+          -0.6191593289375306,
+          -0.7488683319091797,
+          0.0
+        ],
+        "q99": [
+          0.6136963081359863,
+          0.33704194784164443,
+          0.6681221985816956,
+          0.7422861719131538,
+          0.7955395007133507,
+          0.740464625358582,
+          0.0
+        ],
+        "std": [
+          0.12211630493402481,
+          0.19378569722175598,
+          0.10178232192993164,
+          0.5725256204605103,
+          0.298846036195755,
+          0.32599160075187683,
+          0.0
+        ]
+      },
+      "num_trajectories": 1003,
+      "num_transitions": 325699,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "ucsd_kitchen_dataset_converted_externally_to_rlds/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          678.0,
+          400.0,
+          507.0,
+          180.00001525878906,
+          6.000013828277588,
+          116.99998474121094,
+          1.0
+        ],
+        "mean": [
+          410.375732421875,
+          116.9518814086914,
+          192.35031127929688,
+          -121.22441864013672,
+          -33.84892654418945,
+          50.016136169433594,
+          0.741813600063324
+        ],
+        "min": [
+          172.0,
+          -166.0,
+          -99.99999237060547,
+          -180.00001525878906,
+          -89.0,
+          -96.00010681152344,
+          0.0
+        ],
+        "q01": [
+          200.00001052856445,
+          -102.31004211425781,
+          -94.99993370056153,
+          -180.00001525878906,
+          -88.00001525878906,
+          -38.999977111816406,
+          0.0
+        ],
+        "q99": [
+          637.0,
+          368.30999999999995,
+          493.0,
+          180.00001525878906,
+          0.999983012676239,
+          105.00001525878906,
+          1.0
+        ],
+        "std": [
+          122.81488037109375,
+          108.80094909667969,
+          130.30345153808594,
+          116.2820053100586,
+          27.62191390991211,
+          41.02091979980469,
+          0.4376337230205536
+        ]
+      },
+      "num_trajectories": 150,
+      "num_transitions": 3970,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "utaustin_mutex/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.0,
+          1.0,
+          1.0,
+          0.375,
+          0.375,
+          0.375,
+          1.0
+        ],
+        "mean": [
+          0.06176406517624855,
+          -0.005005490034818649,
+          0.10216782987117767,
+          -0.03314131125807762,
+          0.013895022682845592,
+          -0.011317633092403412,
+          0.5038976669311523
+        ],
+        "min": [
+          -1.0,
+          -1.0,
+          -1.0,
+          -0.375,
+          -0.375,
+          -0.375,
+          0.0
+        ],
+        "q01": [
+          -0.4285714328289032,
+          -0.9800000190734863,
+          -0.5571428537368774,
+          -0.375,
+          -0.15642857551574707,
+          -0.335357129573822,
+          0.0
+        ],
+        "q99": [
+          0.5914285778999329,
+          0.9714285731315613,
+          1.0,
+          0.3278571367263794,
+          0.207857146859169,
+          0.25607141852378845,
+          1.0
+        ],
+        "std": [
+          0.187501460313797,
+          0.4468473196029663,
+          0.3792876601219177,
+          0.14097853004932404,
+          0.06453699618577957,
+          0.11765265464782715,
+          0.501045286655426
+        ]
+      },
+      "num_trajectories": 1500,
+      "num_transitions": 361883,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    },
+    "viola/0.1.0": {
+      "action": {
+        "mask": [
+          true,
+          true,
+          true,
+          true,
+          true,
+          true,
+          false
+        ],
+        "max": [
+          1.0,
+          1.0,
+          1.0,
+          0.375,
+          0.36321428418159485,
+          0.375,
+          1.0
+        ],
+        "mean": [
+          0.04761853069067001,
+          -0.029204534366726875,
+          0.055867329239845276,
+          -0.0026185200549662113,
+          0.006867341697216034,
+          -0.016821356490254402,
+          0.7323777675628662
+        ],
+        "min": [
+          -1.0,
+          -1.0,
+          -1.0,
+          -0.375,
+          -0.375,
+          -0.375,
+          0.0
+        ],
+        "q01": [
+          -0.9628571271896362,
+          -1.0,
+          -1.0,
+          -0.26249998807907104,
+          -0.21321429312229156,
+          -0.3385714292526245,
+          0.0
+        ],
+        "q99": [
+          0.9114285707473755,
+          0.868571400642395,
+          1.0,
+          0.2817857265472412,
+          0.2239285707473755,
+          0.3557142913341522,
+          1.0
+        ],
+        "std": [
+          0.39157867431640625,
+          0.40765219926834106,
+          0.40077903866767883,
+          0.10023998469114304,
+          0.08443189412355423,
+          0.10375089943408966,
+          0.442600816488266
+        ]
+      },
+      "num_trajectories": 150,
+      "num_transitions": 76324,
+      "proprio": {
+        "max": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "mean": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "min": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q01": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "q99": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ],
+        "std": [
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0,
+          0.0
+        ]
+      }
+    }
+  }
+}

checkpoint-4500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:377bba4ecd420d5772191432f55bb25bbc2610e2f29bc227a2b1705b6a07e689
+size 14498

checkpoint-4500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": {
+    "content": "<bos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<eos>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-4500/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2523a63c898ebf0a32c7282a2e459ef2c950a846c5f3172305089e4149b6b6c3
+size 36157680

checkpoint-4500/tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1bca31d7cf81339398135aee03017c25c631f57410fadacdd38c77df157ba167
+size 17287214

checkpoint-4500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:692a6960b2b8371501eeed1a2dfdbbe2c6a5727c188813ef7d5e38877285c804
+size 7416

checkpoint-4500/zero_to_fp32.py ADDED Viewed

	@@ -0,0 +1,674 @@

+#!/usr/bin/env python
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0
+# DeepSpeed Team
+# This script extracts fp32 consolidated weights from a zero 1, 2 and 3 DeepSpeed checkpoints. It gets
+# copied into the top level checkpoint dir, so the user can easily do the conversion at any point in
+# the future. Once extracted, the weights don't require DeepSpeed and can be used in any
+# application.
+#
+# example:
+#   python zero_to_fp32.py . output_dir/
+#   or
+#   python zero_to_fp32.py . output_dir/ --safe_serialization
+import argparse
+import torch
+import glob
+import math
+import os
+import re
+import json
+from tqdm import tqdm
+from collections import OrderedDict
+from dataclasses import dataclass
+# while this script doesn't use deepspeed to recover data, since the checkpoints are pickled with
+# DeepSpeed data structures it has to be available in the current python environment.
+from deepspeed.utils import logger
+from deepspeed.checkpoint.constants import (DS_VERSION, OPTIMIZER_STATE_DICT, SINGLE_PARTITION_OF_FP32_GROUPS,
+                                            FP32_FLAT_GROUPS, ZERO_STAGE, PARTITION_COUNT, PARAM_SHAPES, BUFFER_NAMES,
+                                            FROZEN_PARAM_SHAPES, FROZEN_PARAM_FRAGMENTS)
+@dataclass
+class zero_model_state:
+    buffers: dict()
+    param_shapes: dict()
+    shared_params: list
+    ds_version: int
+    frozen_param_shapes: dict()
+    frozen_param_fragments: dict()
+debug = 0
+# load to cpu
+device = torch.device('cpu')
+def atoi(text):
+    return int(text) if text.isdigit() else text
+def natural_keys(text):
+    '''
+    alist.sort(key=natural_keys) sorts in human order
+    http://nedbatchelder.com/blog/200712/human_sorting.html
+    (See Toothy's implementation in the comments)
+    '''
+    return [atoi(c) for c in re.split(r'(\d+)', text)]
+def get_model_state_file(checkpoint_dir, zero_stage):
+    if not os.path.isdir(checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{checkpoint_dir}' doesn't exist")
+    # there should be only one file
+    if zero_stage <= 2:
+        file = os.path.join(checkpoint_dir, "mp_rank_00_model_states.pt")
+    elif zero_stage == 3:
+        file = os.path.join(checkpoint_dir, "zero_pp_rank_0_mp_rank_00_model_states.pt")
+    if not os.path.exists(file):
+        raise FileNotFoundError(f"can't find model states file at '{file}'")
+    return file
+def get_checkpoint_files(checkpoint_dir, glob_pattern):
+    # XXX: need to test that this simple glob rule works for multi-node setup too
+    ckpt_files = sorted(glob.glob(os.path.join(checkpoint_dir, glob_pattern)), key=natural_keys)
+    if len(ckpt_files) == 0:
+        raise FileNotFoundError(f"can't find {glob_pattern} files in directory '{checkpoint_dir}'")
+    return ckpt_files
+def get_optim_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_optim_states.pt")
+def get_model_state_files(checkpoint_dir):
+    return get_checkpoint_files(checkpoint_dir, "*_model_states.pt")
+def parse_model_states(files):
+    zero_model_states = []
+    for file in files:
+        state_dict = torch.load(file, map_location=device)
+        if BUFFER_NAMES not in state_dict:
+            raise ValueError(f"{file} is not a model state checkpoint")
+        buffer_names = state_dict[BUFFER_NAMES]
+        if debug:
+            print("Found buffers:", buffer_names)
+        # recover just the buffers while restoring them to fp32 if they were saved in fp16
+        buffers = {k: v.float() for k, v in state_dict["module"].items() if k in buffer_names}
+        param_shapes = state_dict[PARAM_SHAPES]
+        # collect parameters that are included in param_shapes
+        param_names = []
+        for s in param_shapes:
+            for name in s.keys():
+                param_names.append(name)
+        # update with frozen parameters
+        frozen_param_shapes = state_dict.get(FROZEN_PARAM_SHAPES, None)
+        if frozen_param_shapes is not None:
+            if debug:
+                print(f"Found frozen_param_shapes: {frozen_param_shapes}")
+            param_names += list(frozen_param_shapes.keys())
+        # handle shared params
+        shared_params = [[k, v] for k, v in state_dict["shared_params"].items()]
+        ds_version = state_dict.get(DS_VERSION, None)
+        frozen_param_fragments = state_dict.get(FROZEN_PARAM_FRAGMENTS, None)
+        z_model_state = zero_model_state(buffers=buffers,
+                                         param_shapes=param_shapes,
+                                         shared_params=shared_params,
+                                         ds_version=ds_version,
+                                         frozen_param_shapes=frozen_param_shapes,
+                                         frozen_param_fragments=frozen_param_fragments)
+        zero_model_states.append(z_model_state)
+    return zero_model_states
+def parse_optim_states(files, ds_checkpoint_dir):
+    total_files = len(files)
+    state_dicts = []
+    for f in files:
+        state_dict = torch.load(f, map_location=device)
+        # immediately discard the potentially huge 2 optimizer states as we only care for fp32 master weights
+        # and also handle the case where it was already removed by another helper script
+        state_dict["optimizer_state_dict"].pop("optimizer_state_dict", None)
+        state_dicts.append(state_dict)
+    if not ZERO_STAGE in state_dicts[0][OPTIMIZER_STATE_DICT]:
+        raise ValueError(f"{files[0]} is not a zero checkpoint")
+    zero_stage = state_dicts[0][OPTIMIZER_STATE_DICT][ZERO_STAGE]
+    world_size = state_dicts[0][OPTIMIZER_STATE_DICT][PARTITION_COUNT]
+    # For ZeRO-2 each param group can have different partition_count as data parallelism for expert
+    # parameters can be different from data parallelism for non-expert parameters. So we can just
+    # use the max of the partition_count to get the dp world_size.
+    if type(world_size) is list:
+        world_size = max(world_size)
+    if world_size != total_files:
+        raise ValueError(
+            f"Expected {world_size} of '*_optim_states.pt' under '{ds_checkpoint_dir}' but found {total_files} files. "
+            "Possibly due to an overwrite of an old checkpoint, or a checkpoint didn't get saved by one or more processes."
+        )
+    # the groups are named differently in each stage
+    if zero_stage <= 2:
+        fp32_groups_key = SINGLE_PARTITION_OF_FP32_GROUPS
+    elif zero_stage == 3:
+        fp32_groups_key = FP32_FLAT_GROUPS
+    else:
+        raise ValueError(f"unknown zero stage {zero_stage}")
+    if zero_stage <= 2:
+        fp32_flat_groups = [state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key] for i in range(len(state_dicts))]
+    elif zero_stage == 3:
+        # if there is more than one param group, there will be multiple flattened tensors - one
+        # flattened tensor per group - for simplicity merge them into a single tensor
+        #
+        # XXX: could make the script more memory efficient for when there are multiple groups - it
+        # will require matching the sub-lists of param_shapes for each param group flattened tensor
+        fp32_flat_groups = [
+            torch.cat(state_dicts[i][OPTIMIZER_STATE_DICT][fp32_groups_key], 0) for i in range(len(state_dicts))
+        ]
+    return zero_stage, world_size, fp32_flat_groups
+def _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters):
+    """
+    Returns fp32 state_dict reconstructed from ds checkpoint
+    Args:
+        - ``ds_checkpoint_dir``: path to the deepspeed checkpoint folder (where the optimizer files are)
+    """
+    print(f"Processing zero checkpoint '{ds_checkpoint_dir}'")
+    optim_files = get_optim_files(ds_checkpoint_dir)
+    zero_stage, world_size, fp32_flat_groups = parse_optim_states(optim_files, ds_checkpoint_dir)
+    print(f"Detected checkpoint of type zero stage {zero_stage}, world_size: {world_size}")
+    model_files = get_model_state_files(ds_checkpoint_dir)
+    zero_model_states = parse_model_states(model_files)
+    print(f'Parsing checkpoint created by deepspeed=={zero_model_states[0].ds_version}')
+    if zero_stage <= 2:
+        return _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+    elif zero_stage == 3:
+        return _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                                          exclude_frozen_parameters)
+def _zero2_merge_frozen_params(state_dict, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+    frozen_param_fragments = zero_model_states[0].frozen_param_fragments
+    if debug:
+        num_elem = sum(s.numel() for s in frozen_param_shapes.values())
+        print(f'rank 0: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in frozen_param_fragments.values()])
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        state_dict[name] = frozen_param_fragments[name]
+        if debug:
+            print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _has_callable(obj, fn):
+    attr = getattr(obj, fn, None)
+    return callable(attr)
+def _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    # Reconstruction protocol:
+    #
+    # XXX: document this
+    if debug:
+        for i in range(world_size):
+            for j in range(len(fp32_flat_groups[0])):
+                print(f"{FP32_FLAT_GROUPS}[{i}][{j}].shape={fp32_flat_groups[i][j].shape}")
+    # XXX: memory usage doubles here (zero2)
+    num_param_groups = len(fp32_flat_groups[0])
+    merged_single_partition_of_fp32_groups = []
+    for i in range(num_param_groups):
+        merged_partitions = [sd[i] for sd in fp32_flat_groups]
+        full_single_fp32_vector = torch.cat(merged_partitions, 0)
+        merged_single_partition_of_fp32_groups.append(full_single_fp32_vector)
+    avail_numel = sum(
+        [full_single_fp32_vector.numel() for full_single_fp32_vector in merged_single_partition_of_fp32_groups])
+    if debug:
+        wanted_params = sum([len(shapes) for shapes in param_shapes])
+        wanted_numel = sum([sum(shape.numel() for shape in shapes.values()) for shapes in param_shapes])
+        # not asserting if there is a mismatch due to possible padding
+        print(f"Have {avail_numel} numels to process.")
+        print(f"Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    total_numel = 0
+    total_params = 0
+    for shapes, full_single_fp32_vector in zip(param_shapes, merged_single_partition_of_fp32_groups):
+        offset = 0
+        avail_numel = full_single_fp32_vector.numel()
+        for name, shape in shapes.items():
+            unpartitioned_numel = shape.numel() if _has_callable(shape, 'numel') else math.prod(shape)
+            total_numel += unpartitioned_numel
+            total_params += 1
+            if debug:
+                print(f"{name} full shape: {shape} unpartitioned numel {unpartitioned_numel} ")
+            state_dict[name] = full_single_fp32_vector.narrow(0, offset, unpartitioned_numel).view(shape)
+            offset += unpartitioned_numel
+        # Z2 started to align to 2*world_size to improve nccl performance. Therefore both offset and
+        # avail_numel can differ by anywhere between 0..2*world_size. Due to two unrelated complex
+        # paddings performed in the code it's almost impossible to predict the exact numbers w/o the
+        # live optimizer object, so we are checking that the numbers are within the right range
+        align_to = 2 * world_size
+        def zero2_align(x):
+            return align_to * math.ceil(x / align_to)
+        if debug:
+            print(f"original offset={offset}, avail_numel={avail_numel}")
+        offset = zero2_align(offset)
+        avail_numel = zero2_align(avail_numel)
+        if debug:
+            print(f"aligned  offset={offset}, avail_numel={avail_numel}")
+        # Sanity check
+        if offset != avail_numel:
+            raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero2_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero2_merge_frozen_params(state_dict, zero_model_states)
+    _zero2_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def zero3_partitioned_param_info(unpartitioned_numel, world_size):
+    remainder = unpartitioned_numel % world_size
+    padding_numel = (world_size - remainder) if remainder else 0
+    partitioned_numel = math.ceil(unpartitioned_numel / world_size)
+    return partitioned_numel, padding_numel
+def _zero3_merge_frozen_params(state_dict, world_size, zero_model_states):
+    if zero_model_states[0].frozen_param_shapes is None or len(zero_model_states[0].frozen_param_shapes) == 0:
+        return
+    if debug:
+        for i in range(world_size):
+            num_elem = sum(s.numel() for s in zero_model_states[i].frozen_param_fragments.values())
+            print(f'rank {i}: {FROZEN_PARAM_SHAPES}.numel = {num_elem}')
+        frozen_param_shapes = zero_model_states[0].frozen_param_shapes
+        wanted_params = len(frozen_param_shapes)
+        wanted_numel = sum(s.numel() for s in frozen_param_shapes.values())
+        avail_numel = sum([p.numel() for p in zero_model_states[0].frozen_param_fragments.values()]) * world_size
+        print(f'Frozen params: Have {avail_numel} numels to process.')
+        print(f'Frozen params: Need {wanted_numel} numels in {wanted_params} params')
+    total_params = 0
+    total_numel = 0
+    for name, shape in zero_model_states[0].frozen_param_shapes.items():
+        total_params += 1
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        param_frags = tuple(model_state.frozen_param_fragments[name] for model_state in zero_model_states)
+        state_dict[name] = torch.cat(param_frags, 0).narrow(0, 0, unpartitioned_numel).view(shape)
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Frozen params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+    print(f"Reconstructed Frozen fp32 state dict with {total_params} params {total_numel} elements")
+def _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states):
+    param_shapes = zero_model_states[0].param_shapes
+    avail_numel = fp32_flat_groups[0].numel() * world_size
+    # Reconstruction protocol: For zero3 we need to zip the partitions together at boundary of each
+    # param, re-consolidating each param, while dealing with padding if any
+    # merge list of dicts, preserving order
+    param_shapes = {k: v for d in param_shapes for k, v in d.items()}
+    if debug:
+        for i in range(world_size):
+            print(f"{FP32_FLAT_GROUPS}[{i}].shape={fp32_flat_groups[i].shape}")
+        wanted_params = len(param_shapes)
+        wanted_numel = sum(shape.numel() for shape in param_shapes.values())
+        # not asserting if there is a mismatch due to possible padding
+        avail_numel = fp32_flat_groups[0].numel() * world_size
+        print(f"Trainable params: Have {avail_numel} numels to process.")
+        print(f"Trainable params: Need {wanted_numel} numels in {wanted_params} params.")
+    # params
+    # XXX: for huge models that can't fit into the host's RAM we will have to recode this to support
+    # out-of-core computing solution
+    offset = 0
+    total_numel = 0
+    total_params = 0
+    for name, shape in tqdm(param_shapes.items(), desc='Gathering Sharded Weights'):
+        unpartitioned_numel = shape.numel()
+        total_numel += unpartitioned_numel
+        total_params += 1
+        partitioned_numel, partitioned_padding_numel = zero3_partitioned_param_info(unpartitioned_numel, world_size)
+        if debug:
+            print(
+                f"Trainable params: {total_params} {name} full shape: {shape} partition0 numel={partitioned_numel} partitioned_padding_numel={partitioned_padding_numel}"
+            )
+        # XXX: memory usage doubles here
+        state_dict[name] = torch.cat(
+            tuple(fp32_flat_groups[i].narrow(0, offset, partitioned_numel) for i in range(world_size)),
+            0).narrow(0, 0, unpartitioned_numel).view(shape)
+        offset += partitioned_numel
+    offset *= world_size
+    # Sanity check
+    if offset != avail_numel:
+        raise ValueError(f"consumed {offset} numels out of {avail_numel} - something is wrong")
+    print(f"Reconstructed Trainable fp32 state dict with {total_params} params {total_numel} elements")
+def _get_fp32_state_dict_from_zero3_checkpoint(world_size, fp32_flat_groups, zero_model_states,
+                                               exclude_frozen_parameters):
+    state_dict = OrderedDict()
+    # buffers
+    buffers = zero_model_states[0].buffers
+    state_dict.update(buffers)
+    if debug:
+        print(f"added {len(buffers)} buffers")
+    if not exclude_frozen_parameters:
+        _zero3_merge_frozen_params(state_dict, world_size, zero_model_states)
+    _zero3_merge_trainable_params(state_dict, world_size, fp32_flat_groups, zero_model_states)
+    # recover shared parameters
+    for pair in zero_model_states[0].shared_params:
+        if pair[1] in state_dict:
+            state_dict[pair[0]] = state_dict[pair[1]]
+    return state_dict
+def get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag=None, exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated state_dict that can be loaded with
+    ``load_state_dict()`` and used for training without DeepSpeed or shared with others, for example
+    via a model hub.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in 'latest' file. e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    Returns:
+        - pytorch ``state_dict``
+    Note: this approach may not work if your application doesn't have sufficient free CPU memory and
+    you may need to use the offline approach using the ``zero_to_fp32.py`` script that is saved with
+    the checkpoint.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import get_fp32_state_dict_from_zero_checkpoint
+        # do the training and checkpoint saving
+        state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir) # already on cpu
+        model = model.cpu() # move to cpu
+        model.load_state_dict(state_dict)
+        # submit to model hub or save the model to share with others
+    In this example the ``model`` will no longer be usable in the deepspeed context of the same
+    application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    If you want it all done for you, use ``load_state_dict_from_zero_checkpoint`` instead.
+    """
+    if tag is None:
+        latest_path = os.path.join(checkpoint_dir, 'latest')
+        if os.path.isfile(latest_path):
+            with open(latest_path, 'r') as fd:
+                tag = fd.read().strip()
+        else:
+            raise ValueError(f"Unable to find 'latest' file at {latest_path}")
+    ds_checkpoint_dir = os.path.join(checkpoint_dir, tag)
+    if not os.path.isdir(ds_checkpoint_dir):
+        raise FileNotFoundError(f"Directory '{ds_checkpoint_dir}' doesn't exist")
+    return _get_fp32_state_dict_from_zero_checkpoint(ds_checkpoint_dir, exclude_frozen_parameters)
+def convert_zero_checkpoint_to_fp32_state_dict(checkpoint_dir,
+                                               output_dir,
+                                               max_shard_size="5GB",
+                                               safe_serialization=False,
+                                               tag=None,
+                                               exclude_frozen_parameters=False):
+    """
+    Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict`` file that can be
+    loaded with ``torch.load(file)`` + ``load_state_dict()`` and used for training without DeepSpeed.
+    Args:
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``output_dir``: directory to the pytorch fp32 state_dict output files
+        - ``max_shard_size``: the maximum size for a checkpoint before being sharded, default value is 5GB
+        - ``safe_serialization``:  whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+        - ``exclude_frozen_parameters``: exclude frozen parameters
+    """
+    # Dependency pre-check
+    if safe_serialization:
+        try:
+            from safetensors.torch import save_file
+        except ImportError:
+            print('If you want to use `safe_serialization`, please `pip install safetensors`')
+            raise
+    if max_shard_size is not None:
+        try:
+            from huggingface_hub import split_torch_state_dict_into_shards
+        except ImportError:
+            print('If you want to use `max_shard_size`, please `pip install huggingface_hub`')
+            raise
+    # Convert zero checkpoint to state_dict
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag, exclude_frozen_parameters)
+    # Shard the model if it is too big.
+    weights_name = "model.safetensors" if safe_serialization else "pytorch_model.bin"
+    if max_shard_size is not None:
+        filename_pattern = weights_name.replace(".bin", "{suffix}.bin").replace(".safetensors", "{suffix}.safetensors")
+        state_dict_split = split_torch_state_dict_into_shards(state_dict,
+                                                              filename_pattern=filename_pattern,
+                                                              max_shard_size=max_shard_size)
+    else:
+        from collections import namedtuple
+        StateDictSplit = namedtuple("StateDictSplit", ["is_sharded", "filename_to_tensors"])
+        state_dict_split = StateDictSplit(is_sharded=False,
+                                          filename_to_tensors={weights_name: list(state_dict.keys())})
+    # Save the model
+    filename_to_tensors = state_dict_split.filename_to_tensors.items()
+    for shard_file, tensors in tqdm(filename_to_tensors, desc="Saving checkpoint shards"):
+        shard = {tensor: state_dict[tensor].contiguous() for tensor in tensors}
+        output_path = os.path.join(output_dir, shard_file)
+        if safe_serialization:
+            save_file(shard, output_path, metadata={"format": "pt"})
+        else:
+            torch.save(shard, output_path)
+    # Save index if sharded
+    if state_dict_split.is_sharded:
+        index = {
+            "metadata": state_dict_split.metadata,
+            "weight_map": state_dict_split.tensor_to_filename,
+        }
+        save_index_file = "model.safetensors.index.json" if safe_serialization else "pytorch_model.bin.index.json"
+        save_index_file = os.path.join(output_dir, save_index_file)
+        with open(save_index_file, "w", encoding="utf-8") as f:
+            content = json.dumps(index, indent=2, sort_keys=True) + "\n"
+            f.write(content)
+def load_state_dict_from_zero_checkpoint(model, checkpoint_dir, tag=None):
+    """
+    1. Put the provided model to cpu
+    2. Convert ZeRO 2 or 3 checkpoint into a single fp32 consolidated ``state_dict``
+    3. Load it into the provided model
+    Args:
+        - ``model``: the model object to update
+        - ``checkpoint_dir``: path to the desired checkpoint folder. (one that contains the tag-folder, like ``global_step14``)
+        - ``tag``: checkpoint tag used as a unique identifier for checkpoint. If not provided will attempt to load tag in the file named ``latest`` in the checkpoint folder, e.g., ``global_step14``
+    Returns:
+        - ``model`: modified model
+    Make sure you have plenty of CPU memory available before you call this function. If you don't
+    have enough use the ``zero_to_fp32.py`` utility to do the conversion. You will find it
+    conveniently placed for you in the checkpoint folder.
+    A typical usage might be ::
+        from deepspeed.utils.zero_to_fp32 import load_state_dict_from_zero_checkpoint
+        model = load_state_dict_from_zero_checkpoint(trainer.model, checkpoint_dir)
+        # submit to model hub or save the model to share with others
+    Note, that once this was run, the ``model`` will no longer be usable in the deepspeed context
+    of the same application. i.e. you will need to re-initialize the deepspeed engine, since
+    ``model.load_state_dict(state_dict)`` will remove all the deepspeed magic from it.
+    """
+    logger.info(f"Extracting fp32 weights")
+    state_dict = get_fp32_state_dict_from_zero_checkpoint(checkpoint_dir, tag)
+    logger.info(f"Overwriting model with fp32 weights")
+    model = model.cpu()
+    model.load_state_dict(state_dict, strict=False)
+    return model
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_dir",
+                        type=str,
+                        help="path to the desired checkpoint folder, e.g., path/checkpoint-12")
+    parser.add_argument("output_dir",
+                        type=str,
+                        help="directory to the pytorch fp32 state_dict output files"
+                        "(e.g. path/checkpoint-12-output/)")
+    parser.add_argument(
+        "--max_shard_size",
+        type=str,
+        default="5GB",
+        help="The maximum size for a checkpoint before being sharded. Checkpoints shard will then be each of size"
+        "lower than this size. If expressed as a string, needs to be digits followed by a unit (like `5MB`"
+        "We default it to 5GB in order for models to be able to run easily on free-tier google colab instances"
+        "without CPU OOM issues.")
+    parser.add_argument(
+        "--safe_serialization",
+        default=False,
+        action='store_true',
+        help="Whether to save the model using `safetensors` or the traditional PyTorch way (that uses `pickle`).")
+    parser.add_argument("-t",
+                        "--tag",
+                        type=str,
+                        default=None,
+                        help="checkpoint tag used as a unique identifier for checkpoint. e.g., global_step1")
+    parser.add_argument("--exclude_frozen_parameters", action='store_true', help="exclude frozen parameters")
+    parser.add_argument("-d", "--debug", action='store_true', help="enable debug")
+    args = parser.parse_args()
+    debug = args.debug
+    convert_zero_checkpoint_to_fp32_state_dict(args.checkpoint_dir,
+                                               args.output_dir,
+                                               max_shard_size=args.max_shard_size,
+                                               safe_serialization=args.safe_serialization,
+                                               tag=args.tag,
+                                               exclude_frozen_parameters=args.exclude_frozen_parameters)