#!/usr/bin/env bash # ============================================================================ # LUNA 100M — Full SFT on RAG/MCP data # ============================================================================ # Starts from the old sft_v1 full model checkpoint and fine-tunes all weights # on the ASTERIZER/LUNA-RAG-MCP-SFT-10M dataset. # ============================================================================ set -euo pipefail HF_TOKEN="${HF_TOKEN:?Set HF_TOKEN env var}" CODE_REPO="ASTERIZER/LUNA-Training" WORK_DIR="/workspace/luna" echo "============================================================" echo " LUNA 100M — Full SFT on RAG/MCP data" echo "============================================================" echo "[1/3] Fetching latest training code..." mkdir -p "$WORK_DIR" cd "$WORK_DIR" pip install -q huggingface_hub python3 -c " from huggingface_hub import snapshot_download snapshot_download( repo_id='${CODE_REPO}', local_dir='${WORK_DIR}', token='${HF_TOKEN}', ) print('Code downloaded.') " echo "[2/3] Installing dependencies..." pip install -q torch --index-url https://download.pytorch.org/whl/cu121 2>/dev/null || true pip install -q -r requirements.txt 2>/dev/null echo "[3/3] Starting full SFT..." nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || true CUDA_VISIBLE_DEVICES=0 python3 sft_train.py \ --config rag_mcp_full_sft_config.yaml echo "============================================================" echo " Full SFT complete!" echo " Output: Base/out/sft/rag_mcp_full_sft" echo " To upload it to Hugging Face, run:" echo " python3 upload_full_sft_to_hf.py --repo-id ASTERIZER/LUNA-100M --folder Base/out/sft/rag_mcp_full_sft --path-in-repo rag_mcp_full_sft" if [ "${UPLOAD_TO_HF:-0}" = "1" ]; then echo " UPLOAD_TO_HF=1 detected. Uploading full SFT folder to Hugging Face..." python3 upload_full_sft_to_hf.py \ --repo-id ASTERIZER/LUNA-100M \ --folder Base/out/sft/rag_mcp_full_sft \ --path-in-repo rag_mcp_full_sft fi echo "============================================================"