LUNA-Training / gpu_full_sft.sh
ASTERIZER's picture
Upload gpu_full_sft.sh with huggingface_hub
1e4733c verified
#!/usr/bin/env bash
# ============================================================================
# LUNA 100M — Full SFT on RAG/MCP data
# ============================================================================
# Starts from the old sft_v1 full model checkpoint and fine-tunes all weights
# on the ASTERIZER/LUNA-RAG-MCP-SFT-10M dataset.
# ============================================================================
set -euo pipefail
HF_TOKEN="${HF_TOKEN:?Set HF_TOKEN env var}"
CODE_REPO="ASTERIZER/LUNA-Training"
WORK_DIR="/workspace/luna"
echo "============================================================"
echo " LUNA 100M — Full SFT on RAG/MCP data"
echo "============================================================"
echo "[1/3] Fetching latest training code..."
mkdir -p "$WORK_DIR"
cd "$WORK_DIR"
pip install -q huggingface_hub
python3 -c "
from huggingface_hub import snapshot_download
snapshot_download(
repo_id='${CODE_REPO}',
local_dir='${WORK_DIR}',
token='${HF_TOKEN}',
)
print('Code downloaded.')
"
echo "[2/3] Installing dependencies..."
pip install -q torch --index-url https://download.pytorch.org/whl/cu121 2>/dev/null || true
pip install -q -r requirements.txt 2>/dev/null
echo "[3/3] Starting full SFT..."
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true
CUDA_VISIBLE_DEVICES=0 python3 sft_train.py \
--config rag_mcp_full_sft_config.yaml
echo "============================================================"
echo " Full SFT complete!"
echo " Output: Base/out/sft/rag_mcp_full_sft"
echo " To upload it to Hugging Face, run:"
echo " python3 upload_full_sft_to_hf.py --repo-id ASTERIZER/LUNA-100M --folder Base/out/sft/rag_mcp_full_sft --path-in-repo rag_mcp_full_sft"
if [ "${UPLOAD_TO_HF:-0}" = "1" ]; then
echo " UPLOAD_TO_HF=1 detected. Uploading full SFT folder to Hugging Face..."
python3 upload_full_sft_to_hf.py \
--repo-id ASTERIZER/LUNA-100M \
--folder Base/out/sft/rag_mcp_full_sft \
--path-in-repo rag_mcp_full_sft
fi
echo "============================================================"