Add RTX 6000 Ada QLoRA training and evaluation repo

d9ba941 verified 8 days ago

934 Bytes

	#!/usr/bin/env bash
	set -euo pipefail

	# One-command recommended training run for a single RTX 6000 Ada 48/50GB server.
	# Usage:
	# export HF_TOKEN=...
	# export TRACKIO_SPACE_ID=nraptisss/tmf921-trackio # optional
	# bash scripts/run_rtx6000ada.sh

	python -m pip install -U pip
	python -m pip install -r requirements.txt

	# Optional throughput improvement. Uncomment only if compatible with your CUDA/PyTorch build.
	# python -m pip install flash-attn --no-build-isolation

	export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}
	export TOKENIZERS_PARALLELISM=false
	export PYTHONPATH="$PWD/src:${PYTHONPATH:-}"

	python scripts/train_qlora.py \
	--config configs/rtx6000ada_qwen3_8b_qlora.yaml

	python scripts/evaluate_model.py \
	--model Qwen/Qwen3-8B \
	--adapter outputs/qwen3-8b-tmf921-qlora \
	--dataset nraptisss/TMF921-intent-to-config-research-sota \
	--output_dir outputs/qwen3-8b-tmf921-qlora/eval \
	--load_in_4bit