| set -euo pipefail | |
| # Install CUDA-enabled PyTorch and the training stack for RTX 6000 Ada. | |
| # Run this instead of plain `pip install -r requirements.txt` if your server accidentally installed CPU-only torch. | |
| python -m pip install -U pip setuptools wheel | |
| # CUDA 12.4 PyTorch wheels work on recent NVIDIA drivers. If your driver only supports older CUDA, | |
| # use the matching PyTorch index from https://pytorch.org/get-started/locally/ . | |
| python -m pip install --upgrade --index-url https://download.pytorch.org/whl/cu124 \ | |
| torch torchvision torchaudio | |
| # Install the rest. The CUDA torch wheel above already satisfies torch>=2.6, so pip should not replace it. | |
| python -m pip install --upgrade \ | |
| transformers 'trl[peft]' peft accelerate datasets bitsandbytes safetensors huggingface_hub trackio \ | |
| pandas numpy tqdm jsonschema scikit-learn pyyaml rich | |
| python - <<'PY' | |
| import sys | |
| import torch | |
| print('python', sys.version) | |
| print('torch', torch.__version__) | |
| print('torch.version.cuda', torch.version.cuda) | |
| print('cuda_available', torch.cuda.is_available()) | |
| if not torch.cuda.is_available(): | |
| raise SystemExit('ERROR: CUDA is not available. Check NVIDIA driver, CUDA_VISIBLE_DEVICES, and PyTorch CUDA wheel.') | |
| print('gpu_count', torch.cuda.device_count()) | |
| print('gpu_name', torch.cuda.get_device_name(0)) | |
| PY | |