ASTERIZER commited on
Commit
88d5091
Β·
verified Β·
1 Parent(s): af450cd

Upload setup_and_sft.sh with huggingface_hub

Browse files
Files changed (1) hide show
  1. setup_and_sft.sh +112 -0
setup_and_sft.sh ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # =============================================================================
3
+ # LUNA 100M β€” SFT Cloud Setup & Train Entrypoint
4
+ # Runs on RunPod, Vast.ai, Lambda Labs, or any Linux GPU pod.
5
+ #
6
+ # USAGE:
7
+ # bash setup_and_sft.sh [huggingface|local] [HF_REPO_OR_PATH]
8
+ #
9
+ # EXAMPLES:
10
+ # # From HuggingFace (recommended):
11
+ # bash setup_and_sft.sh huggingface ASTERIZER/LUNA
12
+ #
13
+ # # Already cloned locally:
14
+ # bash setup_and_sft.sh local /workspace/LUNA
15
+ # =============================================================================
16
+
17
+ set -e
18
+
19
+ DATA_SOURCE="${1:-huggingface}"
20
+ REPO_ID="${2:-ASTERIZER/LUNA}"
21
+
22
+ echo "=========================================="
23
+ echo " LUNA 100M β€” SFT Setup"
24
+ echo " Source: $DATA_SOURCE"
25
+ echo " Repo : $REPO_ID"
26
+ echo "=========================================="
27
+
28
+ # ── 1. Python packages ────────────────────────────────────────────────────────
29
+ echo ""
30
+ echo "[1/4] Installing dependencies..."
31
+
32
+ pip install -q --upgrade pip
33
+ pip install -q \
34
+ torch torchvision \
35
+ psutil \
36
+ pyyaml \
37
+ transformers \
38
+ huggingface_hub \
39
+ datasets 2>/dev/null || true
40
+
41
+ echo " Done."
42
+
43
+ # ── 2. Clone repo / fetch data ────────────────────────────────────────────────
44
+ echo ""
45
+ echo "[2/4] Fetching repository..."
46
+
47
+ WORK_DIR="/workspace/LUNA"
48
+
49
+ if [ "$DATA_SOURCE" = "huggingface" ]; then
50
+ if [ ! -d "$WORK_DIR" ]; then
51
+ # Clone the HF Space as a git repo
52
+ HF_TOKEN="${HF_TOKEN:-}"
53
+ if [ -n "$HF_TOKEN" ]; then
54
+ git clone "https://huggingface.co/spaces/$REPO_ID" "$WORK_DIR" || \
55
+ python -c "
56
+ from huggingface_hub import snapshot_download
57
+ snapshot_download(repo_id='$REPO_ID', repo_type='space', local_dir='$WORK_DIR',
58
+ token='$HF_TOKEN' if '$HF_TOKEN' else None)
59
+ "
60
+ else
61
+ git clone "https://huggingface.co/spaces/$REPO_ID" "$WORK_DIR" || \
62
+ python -c "
63
+ from huggingface_hub import snapshot_download
64
+ snapshot_download(repo_id='$REPO_ID', repo_type='space', local_dir='$WORK_DIR')
65
+ "
66
+ fi
67
+ else
68
+ echo " $WORK_DIR already exists, pulling latest..."
69
+ cd "$WORK_DIR" && git pull || echo " (not a git repo, using existing)"
70
+ fi
71
+ elif [ "$DATA_SOURCE" = "local" ]; then
72
+ WORK_DIR="$REPO_ID"
73
+ fi
74
+
75
+ cd "$WORK_DIR"
76
+ echo " Working dir: $(pwd)"
77
+
78
+ # ── 3. System probe ──────────────────────────────────────────────────────────
79
+ echo ""
80
+ echo "[3/4] System probe..."
81
+ python -c "
82
+ import torch, psutil, os
83
+ props = torch.cuda.get_device_properties(0) if torch.cuda.is_available() else None
84
+ print(f' GPU : {props.name if props else \"CPU only\"} ({props.total_memory/1024**3:.1f} GB)' if props else ' GPU: None')
85
+ print(f' RAM : {psutil.virtual_memory().total/1024**3:.1f} GB')
86
+ print(f' CPUs : {os.cpu_count()}')
87
+ "
88
+
89
+ # Verify files exist
90
+ echo ""
91
+ echo " Checking required files..."
92
+ for f in sft_train.py sft_config.yaml Base/Datasets/sft_clean/train.json; do
93
+ if [ -f "$f" ]; then
94
+ echo " βœ“ $f"
95
+ else
96
+ echo " βœ— $f MISSING!"
97
+ fi
98
+ done
99
+ echo " (Pretrained checkpoint will be auto-downloaded from HuggingFace if not present)"
100
+
101
+ # ── 4. Train SFT ─────────────────────────────────────────────────────────────
102
+ echo ""
103
+ echo "[4/4] Starting SFT training..."
104
+ echo ""
105
+
106
+ python sft_train.py \
107
+ --config sft_config.yaml
108
+
109
+ echo ""
110
+ echo "=========================================="
111
+ echo " SFT complete! Output: Base/out/sft/luna_100m_sft"
112
+ echo "=========================================="