| # requirements.txt for Hugging Face Spaces with NVIDIA T4 GPU |
| # Core Framework Dependencies |
|
|
| # Note: gradio, fastapi, uvicorn, datasets, huggingface-hub, |
| # pydantic==2.10.6, and protobuf<4 are installed by HF Spaces SDK |
|
|
| # PyTorch with CUDA support (for GPU inference) |
| # Note: HF Spaces provides torch, but we ensure GPU support |
| # Pin to avoid breaking changes with newer versions |
| torch>=2.0.0,<2.2.0 |
|
|
| # Web Framework & Interface |
| aiohttp>=3.9.0 |
| httpx>=0.25.0 |
|
|
| # Hugging Face Ecosystem |
| # PINNED for Phi-3 and DynamicCache compatibility |
| # transformers 4.36.0+ has better Phi-3 support, but <4.41.0 to avoid breaking changes |
| transformers>=4.36.0,<4.41.0 |
| accelerate>=0.24.0,<0.28.0 |
| tokenizers>=0.15.0 |
| sentence-transformers>=2.2.0 |
|
|
| # Vector Database & Search |
| faiss-cpu>=1.7.4 |
| # Pin numpy to avoid compatibility issues with numpy 2.0 |
| numpy>=1.24.0,<2.0.0 |
| scipy>=1.11.0 |
|
|
| # Data Processing & Utilities |
| pandas>=2.1.0 |
| scikit-learn>=1.3.0 |
|
|
| # Database & Persistence |
| sqlalchemy>=2.0.0 |
| alembic>=1.12.0 |
|
|
| # Caching & Performance |
| cachetools>=5.3.0 |
| redis>=5.0.0 |
| python-multipart>=0.0.6 |
|
|
| # Security & Validation |
| pydantic-settings>=2.1.0 |
| python-dotenv>=1.0.0 # For secure .env file loading |
| python-jose[cryptography]>=3.3.0 |
| bcrypt>=4.0.0 |
|
|
| # Mobile Optimization & UI |
| cssutils>=2.7.0 |
| pillow>=10.1.0 |
| requests>=2.31.0 |
|
|
| # Async & Concurrency |
| aiofiles>=23.2.0 |
| concurrent-log-handler>=0.9.0 |
|
|
| # Logging & Monitoring |
| structlog>=23.2.0 |
| prometheus-client>=0.19.0 |
| psutil>=5.9.0 |
|
|
| # Development & Testing |
| pytest>=7.4.0 |
| pytest-asyncio>=0.21.0 |
| pytest-cov>=4.1.0 |
| black>=23.11.0 |
| flake8>=6.1.0 |
| mypy>=1.7.0 |
|
|
| # Utility Libraries |
| python-dateutil>=2.8.0 |
| pytz>=2023.3 |
| tzdata>=2023.3 |
| ujson>=5.8.0 |
| orjson>=3.9.0 |
|
|
| # Flask API for external integrations |
| flask>=3.0.0 |
| flask-cors>=4.0.0 |
| flask-limiter>=3.5.0 # Rate limiting for API protection |
|
|
| # Production WSGI Server |
| gunicorn>=21.2.0 # Production WSGI server (replaces Flask dev server) |
|
|
| # HF Spaces Specific Dependencies |
| # Note: huggingface-cli is part of huggingface-hub (installed by SDK) |
| gradio-client>=0.8.0 |
| gradio-pdf>=0.0.6 |
|
|
| # Model-specific dependencies |
| safetensors>=0.4.0 |
| # Pin bitsandbytes to avoid breaking changes with quantization |
| bitsandbytes>=0.41.0,<0.43.0 # Required for 4-bit and 8-bit quantization on GPU |
|
|
| # Optional: Flash Attention (for better performance with transformer models) |
| # Uncomment if you want flash attention (requires CUDA 11.8+ and compatible GPU) |
| # Note: Improves performance but adds build complexity |
| # flash-attn>=2.3.0 # Optional - improves performance but requires CUDA 11.8+ |
|
|
| # Development/debugging |
| ipython>=8.17.0 |
| ipdb>=0.13.0 |
| debugpy>=1.7.0 |
|
|
| # Security Tools (for security audits) |
| bandit>=1.7.5 # Security linter for Python code |
| safety>=2.3.5 # Dependency vulnerability scanner |
|
|
| # HTTP Client for ZeroGPU Chat API (aiohttp already included above) |
| # Note: No OpenAI client needed - using direct HTTP requests |
| |