# Core dependencies torch>=2.2.0 transformers>=4.40.0 accelerate>=0.30.0 datasets>=2.18.0 tokenizers>=0.19.0 # Quantization bitsandbytes>=0.43.0 # Flash Attention (CUDA only) flash-attn>=2.5.0 # Scientific computing numpy>=1.26.0 scipy>=1.12.0 scikit-learn>=1.4.0 # Chemistry/Biology rdkit>=2023.9.0 pubchempy>=1.0.4 # Web scraping arxiv>=2.1.0 beautifulsoup4>=4.12.0 requests>=2.31.0 # Data processing pandas>=2.0.0 pyarrow>=14.0.0 # LaTeX parsing pylatexenc>=2.10 # Deduplication minhash>=0.1.0 # Utilities tqdm>=4.65.0 psutil>=5.9.0 jsonlines>=3.1.0 # Optional: wandb for logging # wandb>=0.16.0 # Development/testing pytest>=7.0.0 black>=23.0.0 flake8>=6.0.0 mypy>=1.0.0