#!/bin/bash # Preflight check: validates the training environment without GPUs or data # Run inside the Docker container: # docker run --rm pi05-training ./preflight.sh PASS=0 FAIL=0 check() { local name="$1" shift if "$@" > /dev/null 2>&1; then echo " PASS $name" PASS=$((PASS + 1)) else echo " FAIL $name" FAIL=$((FAIL + 1)) fi } echo "=== Preflight Checks ===" echo "" echo "-- Python & Core Packages --" check "python 3.10" python -c "import sys; assert sys.version_info[:2] == (3,10)" check "torch imports" python -c "import torch" check "transformers >= 4.45" python -c "import transformers; v=transformers.__version__; assert tuple(int(x) for x in v.split('.')[:2]) >= (4,45), v" check "accelerate imports" python -c "import accelerate" check "lerobot imports" python -c "import lerobot" check "wandb imports" python -c "import wandb" check "huggingface_hub" python -c "import huggingface_hub" echo "" echo "-- PaliGemma Config (the previous crash) --" check "PaliGemma registered" python -c " from transformers import AutoConfig # This is what crashed before - CONFIG_MAPPING['paligemma'] was None c = AutoConfig.for_model('paligemma') assert c is not None " echo "" echo "-- FFmpeg --" check "ffmpeg available" ffmpeg -version check "ffmpeg version >= 6" python -c " import subprocess, re out = subprocess.check_output(['ffmpeg', '-version']).decode() ver = int(re.search(r'ffmpeg version (\d+)', out).group(1)) assert ver >= 6, f'ffmpeg {ver} < 6' " echo "" echo "-- Project Files --" check "filtered_index.json" test -f /workspace/pi05-so100-diverse/filtered_index.json check "norm_stats.json" test -f /workspace/pi05-so100-diverse/norm_stats.json check "train_cloud.sh" test -f /workspace/pi05-so100-diverse/train_cloud.sh check "so100_dataset.py" test -f /workspace/pi05-so100-diverse/so100_dataset.py echo "" echo "-- LeRobot Patches Applied --" check "patched train script" python -c " import lerobot.scripts.lerobot_train import inspect src = inspect.getsource(lerobot.scripts.lerobot_train) assert 'early_stop_steps' in src, 'train patch not applied' " check "patched factory" python -c " import lerobot.datasets.factory import inspect src = inspect.getsource(lerobot.datasets.factory) assert 'so100:' in src, 'factory patch not applied' " echo "" echo "-- Accelerate Multi-GPU Config --" check "accelerate launch" accelerate launch --help echo "" echo "-- HuggingFace Auth --" if [ -n "$HF_TOKEN" ]; then check "HF_TOKEN valid" python -c " from huggingface_hub import HfApi api = HfApi(token='$HF_TOKEN') api.whoami() " else echo " SKIP HF_TOKEN not set (set it to validate auth + Gemma license)" fi echo "" echo "-- Weights Download (dry check) --" if [ -n "$HF_TOKEN" ]; then check "pi05_base accessible" python -c " from huggingface_hub import HfApi api = HfApi(token='$HF_TOKEN') info = api.model_info('lerobot/pi05_base') assert info is not None " else echo " SKIP Need HF_TOKEN to check model access" fi echo "" echo "================================" echo " Results: $PASS passed, $FAIL failed" echo "================================" [ "$FAIL" -eq 0 ] && exit 0 || exit 1