# QModel Docker Compose Configuration # ==================================== # Configure via .env file: # LLM_BACKEND=ollama (default: local Ollama on host machine) # LLM_BACKEND=hf (HuggingFace backend) # # Usage: # docker-compose up # Uses backend from .env # docker-compose up -d # Run in background # docker-compose logs -f # View logs # docker-compose down # Stop services version: "3.8" services: qmodel: build: . container_name: qmodel-api ports: - "8000:8000" env_file: - .env environment: # Pass through HF token if using HuggingFace backend - HF_TOKEN=${HF_TOKEN:-} # Ollama host: use Docker host IP for local Ollama - OLLAMA_HOST=${OLLAMA_HOST:-http://host.docker.internal:11434} volumes: # Mount current directory for live code changes (development) - .:/app # Cache HuggingFace models to avoid re-downloading - huggingface_cache:/root/.cache/huggingface # Restart automatically if container exits restart: on-failure:3 extra_hosts: # Allow container to reach host.docker.internal on Mac/Windows - "host.docker.internal:host-gateway" networks: - qmodel-network # Health check for orchestration healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 start_period: 60s networks: qmodel-network: driver: bridge volumes: # Persistent cache for HuggingFace models huggingface_cache: