| set -euo pipefail | |
| # One-command recommended training run for a single RTX 6000 Ada 48/50GB server. | |
| # Usage: | |
| # export HF_TOKEN=... | |
| # export TRACKIO_SPACE_ID=nraptisss/tmf921-trackio # optional | |
| # bash scripts/run_rtx6000ada.sh | |
| python -m pip install -U pip | |
| python -m pip install -r requirements.txt | |
| # Optional throughput improvement. Uncomment only if compatible with your CUDA/PyTorch build. | |
| # python -m pip install flash-attn --no-build-isolation | |
| export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0} | |
| export TOKENIZERS_PARALLELISM=false | |
| export PYTHONPATH="$PWD/src:${PYTHONPATH:-}" | |
| python scripts/train_qlora.py \ | |
| --config configs/rtx6000ada_qwen3_8b_qlora.yaml | |
| python scripts/evaluate_model.py \ | |
| --model Qwen/Qwen3-8B \ | |
| --adapter outputs/qwen3-8b-tmf921-qlora \ | |
| --dataset nraptisss/TMF921-intent-to-config-research-sota \ | |
| --output_dir outputs/qwen3-8b-tmf921-qlora/eval \ | |
| --load_in_4bit | |