# Core torch>=2.0.0 transformers>=4.35.0 datasets>=2.14.0 accelerate>=0.24.0 # Metrics scikit-learn>=1.3.0 scipy>=1.11.0 # Optional but recommended trackio>=0.1.0 # FlashAttention (requires CUDA) # flash-attn>=2.3.0 # For longer context / efficiency # xformers>=0.0.22