# QModel Docker Compose Configuration
# ====================================
# Configure via .env file:
#   LLM_BACKEND=ollama   (default: local Ollama on host machine)
#   LLM_BACKEND=hf       (HuggingFace backend)
#
# Usage:
#   docker-compose up                    # Uses backend from .env
#   docker-compose up -d                 # Run in background
#   docker-compose logs -f               # View logs
#   docker-compose down                  # Stop services

version: "3.8"

services:
  qmodel:
    build: .
    container_name: qmodel-api
    ports:
      - "8000:8000"
    env_file:
      - .env
    environment:
      # Pass through HF token if using HuggingFace backend
      - HF_TOKEN=${HF_TOKEN:-}
      # Ollama host: use Docker host IP for local Ollama
      - OLLAMA_HOST=${OLLAMA_HOST:-http://host.docker.internal:11434}
    volumes:
      # Mount current directory for live code changes (development)
      - .:/app
      # Cache HuggingFace models to avoid re-downloading
      - huggingface_cache:/root/.cache/huggingface
    # Restart automatically if container exits
    restart: on-failure:3
    extra_hosts:
      # Allow container to reach host.docker.internal on Mac/Windows
      - "host.docker.internal:host-gateway"
    networks:
      - qmodel-network
    # Health check for orchestration
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 60s

networks:
  qmodel-network:
    driver: bridge

volumes:
  # Persistent cache for HuggingFace models
  huggingface_cache: