Upload production/docker-compose.m2.yml
Browse files
production/docker-compose.m2.yml
CHANGED
|
@@ -1,9 +1,10 @@
|
|
| 1 |
version: "3.8"
|
| 2 |
|
| 3 |
-
# Optimized for MacBook M2 Pro Max 96GB
|
| 4 |
# - MLX local inference runs natively on Metal (host, not in Docker)
|
|
|
|
| 5 |
# - Redis, Postgres, API server in lightweight containers
|
| 6 |
-
# - NIM
|
| 7 |
# - Everything runs natively on Apple Silicon
|
| 8 |
|
| 9 |
services:
|
|
@@ -25,18 +26,22 @@ services:
|
|
| 25 |
- BUDGET_USD_PER_SESSION=10.0
|
| 26 |
- CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
|
| 27 |
- CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# NIM primary (cloud GPU)
|
| 29 |
- NIM_API_BASE=https://integrate.api.nvidia.com/v1
|
| 30 |
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
|
| 31 |
# Cloudflare secondary fallback
|
| 32 |
- CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
|
| 33 |
- CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
|
| 34 |
-
#
|
| 35 |
-
-
|
| 36 |
-
-
|
| 37 |
-
- FALLBACK_SECONDARY=cloudflare
|
| 38 |
# MLX local (runs on host Metal, not in container)
|
| 39 |
-
- MLX_ENABLED=false
|
| 40 |
- MLX_API_BASE=http://host.docker.internal:8000/v1
|
| 41 |
- LOG_LEVEL=INFO
|
| 42 |
depends_on:
|
|
|
|
| 1 |
version: "3.8"
|
| 2 |
|
| 3 |
+
# Optimized for MacBook M2 Pro Max 96GB with Gemma 4 support
|
| 4 |
# - MLX local inference runs natively on Metal (host, not in Docker)
|
| 5 |
+
# - Gemma 4 31B-BF16 fits comfortably in 96GB unified memory
|
| 6 |
# - Redis, Postgres, API server in lightweight containers
|
| 7 |
+
# - NIM → Cloudflare → Gemini → MLX fallback chain
|
| 8 |
# - Everything runs natively on Apple Silicon
|
| 9 |
|
| 10 |
services:
|
|
|
|
| 26 |
- BUDGET_USD_PER_SESSION=10.0
|
| 27 |
- CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
|
| 28 |
- CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
|
| 29 |
+
# Fallback chain: NIM → Cloudflare → Gemini → MLX
|
| 30 |
+
- FALLBACK_ENABLED=true
|
| 31 |
+
- FALLBACK_PRIMARY=nim
|
| 32 |
+
- FALLBACK_SECONDARY=cloudflare
|
| 33 |
+
- FALLBACK_TERTIARY=gemini
|
| 34 |
# NIM primary (cloud GPU)
|
| 35 |
- NIM_API_BASE=https://integrate.api.nvidia.com/v1
|
| 36 |
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
|
| 37 |
# Cloudflare secondary fallback
|
| 38 |
- CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
|
| 39 |
- CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
|
| 40 |
+
# Google Gemini tertiary fallback
|
| 41 |
+
- GEMINI_API_KEY=${GEMINI_API_KEY:-}
|
| 42 |
+
- GEMINI_API_BASE=https://generativelanguage.googleapis.com/v1beta/openai
|
|
|
|
| 43 |
# MLX local (runs on host Metal, not in container)
|
| 44 |
+
- MLX_ENABLED=${MLX_ENABLED:-false}
|
| 45 |
- MLX_API_BASE=http://host.docker.internal:8000/v1
|
| 46 |
- LOG_LEVEL=INFO
|
| 47 |
depends_on:
|