Upload run.sh
Browse files
run.sh
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# ============================================================
|
| 3 |
+
# TMF921 Intent Translation Training β One-Command Runner
|
| 4 |
+
# ============================================================
|
| 5 |
+
# Run on a machine with an NVIDIA GPU (tested on RTX 6000 Ada 50GB)
|
| 6 |
+
#
|
| 7 |
+
# Usage:
|
| 8 |
+
# git clone https://huggingface.co/nraptisss/intent-translation-training
|
| 9 |
+
# cd intent-translation-training
|
| 10 |
+
# chmod +x run.sh
|
| 11 |
+
# ./run.sh # train + evaluate with defaults
|
| 12 |
+
# ./run.sh --eval-only # evaluate an existing adapter
|
| 13 |
+
# ============================================================
|
| 14 |
+
|
| 15 |
+
set -euo pipefail
|
| 16 |
+
|
| 17 |
+
# ββ Configuration ββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
BASE_MODEL="Qwen/Qwen3-8B"
|
| 19 |
+
DATASET="nraptisss/TMF921-intent-to-config-augmented"
|
| 20 |
+
OUTPUT_DIR="./output"
|
| 21 |
+
HUB_MODEL_ID="" # e.g. "nraptisss/Qwen3-8B-TMF921-Intent-QLora"
|
| 22 |
+
|
| 23 |
+
# Training hyperparameters
|
| 24 |
+
EPOCHS=3
|
| 25 |
+
LR=1e-4
|
| 26 |
+
BATCH_SIZE=4
|
| 27 |
+
GRAD_ACCUM=8
|
| 28 |
+
MAX_SEQ_LEN=4096
|
| 29 |
+
LORA_R=32
|
| 30 |
+
LORA_ALPHA=64
|
| 31 |
+
|
| 32 |
+
# Eval
|
| 33 |
+
EVAL_SAMPLES=200 # -1 for full test set
|
| 34 |
+
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 35 |
+
|
| 36 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 37 |
+
echo "β TMF921 Intent Translation β Training Pipeline β"
|
| 38 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 39 |
+
|
| 40 |
+
# Check CUDA
|
| 41 |
+
echo ""
|
| 42 |
+
echo "Checking GPU β¦"
|
| 43 |
+
nvidia-smi --query-gpu=name,memory.total --format=csv,noheader || {
|
| 44 |
+
echo "ERROR: No NVIDIA GPU found. Exiting."
|
| 45 |
+
exit 1
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# Install dependencies
|
| 49 |
+
echo ""
|
| 50 |
+
echo "Installing dependencies β¦"
|
| 51 |
+
pip install -r requirements.txt 2>&1 | tail -5
|
| 52 |
+
|
| 53 |
+
# Login to HF (if token is set)
|
| 54 |
+
if [ -n "${HF_TOKEN:-}" ]; then
|
| 55 |
+
echo ""
|
| 56 |
+
echo "Logging in to Hugging Face β¦"
|
| 57 |
+
huggingface-cli login --token "$HF_TOKEN" --add-to-git-credential
|
| 58 |
+
fi
|
| 59 |
+
|
| 60 |
+
# ββ Training βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
+
if [ "${1:-}" != "--eval-only" ]; then
|
| 62 |
+
echo ""
|
| 63 |
+
echo "Starting training β¦"
|
| 64 |
+
python train.py \
|
| 65 |
+
--base_model "$BASE_MODEL" \
|
| 66 |
+
--dataset "$DATASET" \
|
| 67 |
+
--output_dir "$OUTPUT_DIR" \
|
| 68 |
+
--epochs $EPOCHS \
|
| 69 |
+
--lr $LR \
|
| 70 |
+
--batch_size $BATCH_SIZE \
|
| 71 |
+
--grad_accum $GRAD_ACCUM \
|
| 72 |
+
--max_seq_length $MAX_SEQ_LEN \
|
| 73 |
+
--lora_r $LORA_R \
|
| 74 |
+
--lora_alpha $LORA_ALPHA \
|
| 75 |
+
${HUB_MODEL_ID:+--push_to_hub --hub_model_id "$HUB_MODEL_ID"}
|
| 76 |
+
fi
|
| 77 |
+
|
| 78 |
+
# ββ Evaluation βββββββββββββββββββββββββββββββββββββββββββββββ
|
| 79 |
+
echo ""
|
| 80 |
+
echo "Starting evaluation β¦"
|
| 81 |
+
python evaluate.py \
|
| 82 |
+
--base_model "$BASE_MODEL" \
|
| 83 |
+
--adapter_path "$OUTPUT_DIR" \
|
| 84 |
+
--dataset "$DATASET" \
|
| 85 |
+
--num_samples $EVAL_SAMPLES \
|
| 86 |
+
--output_file "${OUTPUT_DIR}/eval_results.json"
|
| 87 |
+
|
| 88 |
+
echo ""
|
| 89 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|
| 90 |
+
echo "β β
Pipeline complete! β"
|
| 91 |
+
echo "β Model: ${OUTPUT_DIR} β"
|
| 92 |
+
echo "β Metrics: ${OUTPUT_DIR}/eval_results.json β"
|
| 93 |
+
echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ"
|