File size: 3,721 Bytes
77f8256 091c7e0 77f8256 091c7e0 77f8256 091c7e0 77f8256 091c7e0 77f8256 091c7e0 77f8256 091c7e0 77f8256 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | version: "3.8"
# Optimized for MacBook M2 Pro Max 96GB with Gemma 4 support
# - MLX local inference runs natively on Metal (host, not in Docker)
# - Gemma 4 31B-BF16 fits comfortably in 96GB unified memory
# - Redis, Postgres, API server in lightweight containers
# - NIM → Cloudflare → Gemini → MLX fallback chain
# - Everything runs natively on Apple Silicon
services:
api:
build:
context: .
dockerfile: Dockerfile.prod
ports:
- "8000:8000"
environment:
- PORT=8000
- WORKERS=2
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- MAX_CONCURRENT_REQUESTS=100
- DEFAULT_RPM_LIMIT=40
- REQUEST_TIMEOUT=120
- CACHE_TTL_SECONDS=300
- BUDGET_USD_PER_SESSION=10.0
- CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
- CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
# Fallback chain: NIM → Cloudflare → Gemini → MLX
- FALLBACK_ENABLED=true
- FALLBACK_PRIMARY=nim
- FALLBACK_SECONDARY=cloudflare
- FALLBACK_TERTIARY=gemini
# NIM primary (cloud GPU)
- NIM_API_BASE=https://integrate.api.nvidia.com/v1
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
# Cloudflare secondary fallback
- CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
- CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
# Google Gemini tertiary fallback
- GEMINI_API_KEY=${GEMINI_API_KEY:-}
- GEMINI_API_BASE=https://generativelanguage.googleapis.com/v1beta/openai
# MLX local (runs on host Metal, not in container)
- MLX_ENABLED=${MLX_ENABLED:-false}
- MLX_API_BASE=http://host.docker.internal:8000/v1
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
worker:
build:
context: .
dockerfile: Dockerfile.prod
command: ["python", "-m", "production.worker"]
environment:
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
restart: unless-stopped
redis:
image: redis:7-alpine
ports:
- "127.0.0.1:6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
networks:
- ml_intern_network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
restart: unless-stopped
postgres:
image: postgres:16-alpine
ports:
- "127.0.0.1:5432:5432"
environment:
- POSTGRES_USER=ml_intern
- POSTGRES_PASSWORD=ml_intern
- POSTGRES_DB=ml_intern
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
networks:
- ml_intern_network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ml_intern"]
interval: 10s
timeout: 3s
retries: 5
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "80:80"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
depends_on:
- api
networks:
- ml_intern_network
restart: unless-stopped
volumes:
redis_data:
postgres_data:
networks:
ml_intern_network:
driver: bridge
|