# Use a modern PyTorch image that already has CUDA 12+ FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel # Install the exact versions we need RUN pip install --no-cache-dir \ transformers>=5.1.0 \ accelerate \ fastapi \ uvicorn \ python-multipart \ pillow # Copy your model files into the container COPY . /repository WORKDIR /repository # Expose the port Hugging Face expects EXPOSE 80 # Start a tiny web server that talks to your handler.py # We use a helper script to bridge the gap CMD ["uvicorn", "handler:app", "--host", "0.0.0.0", "--port", "80"]