version: "3.8" # Optimized for MacBook M2 Pro Max 96GB with Gemma 4 support # - MLX local inference runs natively on Metal (host, not in Docker) # - Gemma 4 31B-BF16 fits comfortably in 96GB unified memory # - Redis, Postgres, API server in lightweight containers # - NIM → Cloudflare → Gemini → MLX fallback chain # - Everything runs natively on Apple Silicon services: api: build: context: . dockerfile: Dockerfile.prod ports: - "8000:8000" environment: - PORT=8000 - WORKERS=2 - REDIS_URL=redis://redis:6379 - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern - MAX_CONCURRENT_REQUESTS=100 - DEFAULT_RPM_LIMIT=40 - REQUEST_TIMEOUT=120 - CACHE_TTL_SECONDS=300 - BUDGET_USD_PER_SESSION=10.0 - CIRCUIT_BREAKER_FAILURE_THRESHOLD=3 - CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30 # Fallback chain: NIM → Cloudflare → Gemini → MLX - FALLBACK_ENABLED=true - FALLBACK_PRIMARY=nim - FALLBACK_SECONDARY=cloudflare - FALLBACK_TERTIARY=gemini # NIM primary (cloud GPU) - NIM_API_BASE=https://integrate.api.nvidia.com/v1 - NVIDIA_API_KEY=${NVIDIA_API_KEY:-} # Cloudflare secondary fallback - CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-} - CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-} # Google Gemini tertiary fallback - GEMINI_API_KEY=${GEMINI_API_KEY:-} - GEMINI_API_BASE=https://generativelanguage.googleapis.com/v1beta/openai # MLX local (runs on host Metal, not in container) - MLX_ENABLED=${MLX_ENABLED:-false} - MLX_API_BASE=http://host.docker.internal:8000/v1 - LOG_LEVEL=INFO depends_on: redis: condition: service_healthy postgres: condition: service_healthy networks: - ml_intern_network healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/health"] interval: 30s timeout: 10s retries: 3 start_period: 40s restart: unless-stopped worker: build: context: . dockerfile: Dockerfile.prod command: ["python", "-m", "production.worker"] environment: - REDIS_URL=redis://redis:6379 - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern - LOG_LEVEL=INFO depends_on: redis: condition: service_healthy postgres: condition: service_healthy networks: - ml_intern_network restart: unless-stopped redis: image: redis:7-alpine ports: - "127.0.0.1:6379:6379" volumes: - redis_data:/data command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru networks: - ml_intern_network healthcheck: test: ["CMD", "redis-cli", "ping"] interval: 10s timeout: 3s retries: 3 restart: unless-stopped postgres: image: postgres:16-alpine ports: - "127.0.0.1:5432:5432" environment: - POSTGRES_USER=ml_intern - POSTGRES_PASSWORD=ml_intern - POSTGRES_DB=ml_intern volumes: - postgres_data:/var/lib/postgresql/data - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro networks: - ml_intern_network healthcheck: test: ["CMD-SHELL", "pg_isready -U ml_intern"] interval: 10s timeout: 3s retries: 5 restart: unless-stopped nginx: image: nginx:alpine ports: - "80:80" volumes: - ./nginx.conf:/etc/nginx/nginx.conf:ro depends_on: - api networks: - ml_intern_network restart: unless-stopped volumes: redis_data: postgres_data: networks: ml_intern_network: driver: bridge