pi05-so100-diverse / download_dataset.sh
bot
Update lerobot to latest with SO100 rename_map fix
a8eb6e5
raw
history blame contribute delete
790 Bytes
#!/bin/bash
# Download the full community_dataset_v3 using hfd (aria2c-based, resolver-only, no API rate limit issues)
set -e
if [ -z "$HF_TOKEN" ]; then echo "ERROR: export HF_TOKEN first"; exit 1; fi
DATASET_DIR="${DATASET_DIR:-/ephemeral/community_dataset_v3}"
# Install hfd if not present
if [ ! -f /usr/local/bin/hfd ]; then
wget -q https://gist.githubusercontent.com/padeoe/697678ab8e528b85a2a7bddafea1fa4f/raw/hfd.sh -O /usr/local/bin/hfd
chmod +x /usr/local/bin/hfd
fi
echo "Downloading dataset to $DATASET_DIR..."
echo "Using aria2c with 4 threads per file, 5 concurrent downloads"
hfd HuggingFaceVLA/community_dataset_v3 \
--dataset \
--hf_token "$HF_TOKEN" \
--tool aria2c \
-x 4 \
-j 5 \
--local-dir "$DATASET_DIR"
echo "Download complete!"