| # QModel Docker Compose Configuration | |
| # ==================================== | |
| # Configure via .env file: | |
| # LLM_BACKEND=ollama (default: local Ollama on host machine) | |
| # LLM_BACKEND=hf (HuggingFace backend) | |
| # | |
| # Usage: | |
| # docker-compose up # Uses backend from .env | |
| # docker-compose up -d # Run in background | |
| # docker-compose logs -f # View logs | |
| # docker-compose down # Stop services | |
| version: "3.8" | |
| services: | |
| qmodel: | |
| build: . | |
| container_name: qmodel-api | |
| ports: | |
| - "8000:8000" | |
| env_file: | |
| - .env | |
| environment: | |
| # Pass through HF token if using HuggingFace backend | |
| - HF_TOKEN=${HF_TOKEN:-} | |
| # Ollama host: use Docker host IP for local Ollama | |
| - OLLAMA_HOST=${OLLAMA_HOST:-http://host.docker.internal:11434} | |
| volumes: | |
| # Mount current directory for live code changes (development) | |
| - .:/app | |
| # Cache HuggingFace models to avoid re-downloading | |
| - huggingface_cache:/root/.cache/huggingface | |
| # Restart automatically if container exits | |
| restart: on-failure:3 | |
| extra_hosts: | |
| # Allow container to reach host.docker.internal on Mac/Windows | |
| - "host.docker.internal:host-gateway" | |
| networks: | |
| - qmodel-network | |
| # Health check for orchestration | |
| healthcheck: | |
| test: ["CMD", "curl", "-f", "http://localhost:8000/health"] | |
| interval: 30s | |
| timeout: 10s | |
| retries: 3 | |
| start_period: 60s | |
| networks: | |
| qmodel-network: | |
| driver: bridge | |
| volumes: | |
| # Persistent cache for HuggingFace models | |
| huggingface_cache: | |