FROM python:3.12-slim

WORKDIR /app

# Install dependencies (torch CPU-only for inference)
RUN pip install --no-cache-dir \
    openenv-core \
    fastapi \
    uvicorn \
    mcp \
    torch --index-url https://download.pytorch.org/whl/cpu \
    transformers \
    accelerate \
    sentencepiece

# Pre-download model weights at build time (faster cold start)
RUN python -c "from transformers import AutoModelForCausalLM, AutoTokenizer; \
    AutoTokenizer.from_pretrained('Qwen/Qwen2.5-1.5B-Instruct'); \
    AutoModelForCausalLM.from_pretrained('Qwen/Qwen2.5-1.5B-Instruct')"

# Copy app code (paths relative to HF Space repo root)
COPY server/ server/
COPY models.py .
COPY serve.py .
COPY static/ static/

EXPOSE 7860

CMD ["python", "serve.py"]