Buckets:
| # One-shot bootstrap for fresh RunPod / Lambda Labs / similar Linux GPU pod. | |
| # | |
| # Usage on the pod: | |
| # curl -sSL https://raw.githubusercontent.com/bochen2029-pixel/katherine-k0-finetune/master/bootstrap-runpod.sh | bash | |
| # | |
| # Or after manual clone: | |
| # cd katherine-k0-finetune && ./bootstrap-runpod.sh | |
| # | |
| # Sets up: clone repo, install Python deps (unsloth + trl + transformers + hf), | |
| # verify CUDA, optional HF auth, leave you ready to run ./run-cloud-runpod.sh | |
| set -euo pipefail | |
| REPO_URL="${REPO_URL:-https://github.com/bochen2029-pixel/katherine-k0-finetune.git}" | |
| REPO_DIR="${REPO_DIR:-$HOME/katherine-k0-finetune}" | |
| echo "============================================================" | |
| echo "Katherine k0 fine-tune — bootstrap" | |
| echo "============================================================" | |
| echo "Date: $(date -u +%Y-%m-%dT%H:%M:%SZ)" | |
| echo "Host: $(hostname)" | |
| echo "User: $(whoami)" | |
| echo | |
| # 1. CUDA toolkit check | |
| echo "[1/6] Verifying CUDA toolkit..." | |
| if ! command -v nvcc >/dev/null; then | |
| echo " nvcc not in PATH; trying /usr/local/cuda/bin" | |
| if [ -d /usr/local/cuda/bin ]; then | |
| export PATH=/usr/local/cuda/bin:$PATH | |
| fi | |
| fi | |
| if command -v nvcc >/dev/null; then | |
| nvcc --version | grep release | |
| else | |
| echo " WARN: nvcc not found. Unsloth doesn't strictly need it, but llama.cpp" | |
| echo " compilation for GGUF export may. Continue at your own risk." | |
| fi | |
| echo | |
| echo "[2/6] Detecting GPUs..." | |
| nvidia-smi --query-gpu=name,memory.total,driver_version --format=csv | |
| # 3. Clone repo | |
| echo | |
| echo "[3/6] Cloning repo..." | |
| if [ -d "$REPO_DIR/.git" ]; then | |
| echo " repo already at $REPO_DIR; pulling latest" | |
| cd "$REPO_DIR" | |
| git pull --ff-only | |
| else | |
| git clone "$REPO_URL" "$REPO_DIR" | |
| cd "$REPO_DIR" | |
| fi | |
| echo "[3/6] In: $(pwd)" | |
| # 4. Install Python deps | |
| echo | |
| echo "[4/6] Installing Python dependencies..." | |
| echo " (this can take 5-10 min on first run — unsloth pulls a lot)" | |
| # Use the system Python (RunPod's pytorch image has Python 3.11 with pip) | |
| PY=python3 | |
| if ! command -v $PY >/dev/null; then PY=python; fi | |
| $PY -m pip install --quiet --upgrade pip | |
| # Core stack. Pin major versions to avoid surprises. | |
| # Unsloth latest is generally safe; transformers/peft/trl pinned compatible. | |
| $PY -m pip install --quiet \ | |
| "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \ | |
| "transformers>=4.50.0" \ | |
| "trl>=0.12.0" \ | |
| "peft>=0.12.0" \ | |
| "bitsandbytes>=0.43.0" \ | |
| "accelerate>=1.0.0" \ | |
| "datasets>=2.20.0" \ | |
| "huggingface_hub>=0.27.0" \ | |
| "sentencepiece" \ | |
| "protobuf" \ | |
| "xformers" | |
| # Verify import | |
| $PY -c "import unsloth; print(f' unsloth: {unsloth.__version__}')" | |
| $PY -c "import transformers; print(f' transformers: {transformers.__version__}')" | |
| $PY -c "import trl; print(f' trl: {trl.__version__}')" | |
| $PY -c "import peft; print(f' peft: {peft.__version__}')" | |
| echo | |
| echo "[5/6] HF CLI + auth..." | |
| if command -v hf >/dev/null; then | |
| HF_VER=$(hf --version 2>&1 | head -1 || echo "unknown") | |
| echo " hf CLI: $HF_VER" | |
| else | |
| echo " WARN: hf command not in PATH after install; trying huggingface-cli fallback" | |
| fi | |
| if [ -n "${HF_TOKEN:-}" ]; then | |
| if hf auth login --token "$HF_TOKEN" >/dev/null 2>&1; then | |
| echo " HF logged in as $(hf auth whoami 2>&1 | grep user: | awk '{print $2}')" | |
| else | |
| echo " WARN: HF login failed; HF sync will skip at run-time" | |
| fi | |
| elif [ -f "$HOME/.hf_token" ]; then | |
| HF_TOKEN=$(cat "$HOME/.hf_token") | |
| export HF_TOKEN | |
| hf auth login --token "$HF_TOKEN" >/dev/null 2>&1 || true | |
| echo " HF token loaded from \$HOME/.hf_token" | |
| else | |
| echo " HF_TOKEN not set; HF sync will be disabled at run-time" | |
| echo " To enable: export HF_TOKEN=<your_token> before running ./run-cloud-runpod.sh" | |
| fi | |
| # 6. Verify dataset | |
| echo | |
| echo "[6/6] Verifying canonical datasets..." | |
| chmod +x run-cloud-runpod.sh _supervise-cloud.sh bootstrap-runpod.sh 2>/dev/null || true | |
| if [ -f data/k0_canonical.jsonl ]; then | |
| SFT_LINES=$(wc -l < data/k0_canonical.jsonl) | |
| echo " ✓ data/k0_canonical.jsonl ($SFT_LINES SFT examples)" | |
| else | |
| echo " WARN: data/k0_canonical.jsonl missing; rebuild with prep_dataset.py if you have raw sources" | |
| fi | |
| if [ -f data/k0_dpo_curated.jsonl ]; then | |
| DPO_LINES=$(wc -l < data/k0_dpo_curated.jsonl) | |
| echo " ✓ data/k0_dpo_curated.jsonl ($DPO_LINES DPO pairs)" | |
| else | |
| echo " (no DPO data; DPO stage will skip)" | |
| fi | |
| echo | |
| echo "============================================================" | |
| echo "Bootstrap complete." | |
| echo | |
| echo "To launch the full pipeline:" | |
| echo " cd $REPO_DIR" | |
| echo " export HF_TOKEN=<your_token> # if not already set" | |
| echo " ./run-cloud-runpod.sh" | |
| echo | |
| echo "Stages: SFT → DPO → merge+GGUF (3 quants) → push to HF bucket" | |
| echo "Total wallclock: ~50-70 min on H200, ~75-90 min on H100" | |
| echo "============================================================" | |
Xet Storage Details
- Size:
- 4.88 kB
- Xet hash:
- 37c831ea3fd265386806b406ce7b0d7098cbd38d1cf8314c9afc4e5aedd96470
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.