PEFT
qlora
sft
trl
qwen3
tmf921
intent-based-networking
network-slicing
rtx-6000-ada
ml-intern
tmf921-intent-training / scripts /run_rtx6000ada.sh
nraptisss's picture
Add RTX 6000 Ada QLoRA training and evaluation repo
d9ba941 verified
raw
history blame contribute delete
934 Bytes
#!/usr/bin/env bash
set -euo pipefail
# One-command recommended training run for a single RTX 6000 Ada 48/50GB server.
# Usage:
# export HF_TOKEN=...
# export TRACKIO_SPACE_ID=nraptisss/tmf921-trackio # optional
# bash scripts/run_rtx6000ada.sh
python -m pip install -U pip
python -m pip install -r requirements.txt
# Optional throughput improvement. Uncomment only if compatible with your CUDA/PyTorch build.
# python -m pip install flash-attn --no-build-isolation
export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
export TOKENIZERS_PARALLELISM=false
export PYTHONPATH="$PWD/src:${PYTHONPATH:-}"
python scripts/train_qlora.py \
--config configs/rtx6000ada_qwen3_8b_qlora.yaml
python scripts/evaluate_model.py \
--model Qwen/Qwen3-8B \
--adapter outputs/qwen3-8b-tmf921-qlora \
--dataset nraptisss/TMF921-intent-to-config-research-sota \
--output_dir outputs/qwen3-8b-tmf921-qlora/eval \
--load_in_4bit