| """ |
| Minimal example script for converting a dataset to LeRobot format. |
| |
| We use the Libero dataset (stored in RLDS) for this example, but it can be easily |
| modified for any other data you have saved in a custom format. |
| |
| Usage: |
| uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data |
| |
| If you want to push your dataset to the Hugging Face Hub, you can use the following command: |
| uv run examples/libero/convert_libero_data_to_lerobot.py --data_dir /path/to/your/data --push_to_hub |
| |
| Note: to run the script, you need to install tensorflow_datasets: |
| `uv pip install tensorflow tensorflow_datasets` |
| |
| You can download the raw Libero datasets from https://huggingface.co/datasets/openvla/modified_libero_rlds |
| The resulting dataset will get saved to the $HF_LEROBOT_HOME directory. |
| Running this conversion script will take approximately 30 minutes. |
| """ |
|
|
| import shutil |
|
|
| from lerobot.common.datasets.lerobot_dataset import HF_LEROBOT_HOME |
| from lerobot.common.datasets.lerobot_dataset import LeRobotDataset |
| import tensorflow_datasets as tfds |
| import tyro |
|
|
| REPO_NAME = "your_hf_username/libero" |
| RAW_DATASET_NAMES = [ |
| "libero_10_no_noops", |
| "libero_goal_no_noops", |
| "libero_object_no_noops", |
| "libero_spatial_no_noops", |
| ] |
|
|
|
|
| def main(data_dir: str, *, push_to_hub: bool = False): |
| |
| output_path = HF_LEROBOT_HOME / REPO_NAME |
| if output_path.exists(): |
| shutil.rmtree(output_path) |
|
|
| |
| |
| |
| dataset = LeRobotDataset.create( |
| repo_id=REPO_NAME, |
| robot_type="panda", |
| fps=10, |
| features={ |
| "image": { |
| "dtype": "image", |
| "shape": (256, 256, 3), |
| "names": ["height", "width", "channel"], |
| }, |
| "wrist_image": { |
| "dtype": "image", |
| "shape": (256, 256, 3), |
| "names": ["height", "width", "channel"], |
| }, |
| "state": { |
| "dtype": "float32", |
| "shape": (8,), |
| "names": ["state"], |
| }, |
| "actions": { |
| "dtype": "float32", |
| "shape": (7,), |
| "names": ["actions"], |
| }, |
| }, |
| image_writer_threads=10, |
| image_writer_processes=5, |
| ) |
|
|
| |
| |
| for raw_dataset_name in RAW_DATASET_NAMES: |
| raw_dataset = tfds.load(raw_dataset_name, data_dir=data_dir, split="train") |
| for episode in raw_dataset: |
| for step in episode["steps"].as_numpy_iterator(): |
| dataset.add_frame( |
| { |
| "image": step["observation"]["image"], |
| "wrist_image": step["observation"]["wrist_image"], |
| "state": step["observation"]["state"], |
| "actions": step["action"], |
| "task": step["language_instruction"].decode(), |
| } |
| ) |
| dataset.save_episode() |
|
|
| |
| if push_to_hub: |
| dataset.push_to_hub( |
| tags=["libero", "panda", "rlds"], |
| private=False, |
| push_videos=True, |
| license="apache-2.0", |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| tyro.cli(main) |
|
|