File size: 934 Bytes
d9ba941 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 | #!/usr/bin/env bash
set -euo pipefail
# One-command recommended training run for a single RTX 6000 Ada 48/50GB server.
# Usage:
# export HF_TOKEN=...
# export TRACKIO_SPACE_ID=nraptisss/tmf921-trackio # optional
# bash scripts/run_rtx6000ada.sh
python -m pip install -U pip
python -m pip install -r requirements.txt
# Optional throughput improvement. Uncomment only if compatible with your CUDA/PyTorch build.
# python -m pip install flash-attn --no-build-isolation
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH="$PWD/src:${PYTHONPATH:-}"
python scripts/train_qlora.py \
--config configs/rtx6000ada_qwen3_8b_qlora.yaml
python scripts/evaluate_model.py \
--model Qwen/Qwen3-8B \
--adapter outputs/qwen3-8b-tmf921-qlora \
--dataset nraptisss/TMF921-intent-to-config-research-sota \
--output_dir outputs/qwen3-8b-tmf921-qlora/eval \
--load_in_4bit
|