raazkumar commited on
Commit
091c7e0
·
verified ·
1 Parent(s): 30f7cdb

Upload production/docker-compose.m2.yml

Browse files
Files changed (1) hide show
  1. production/docker-compose.m2.yml +12 -7
production/docker-compose.m2.yml CHANGED
@@ -1,9 +1,10 @@
1
  version: "3.8"
2
 
3
- # Optimized for MacBook M2 Pro Max 96GB
4
  # - MLX local inference runs natively on Metal (host, not in Docker)
 
5
  # - Redis, Postgres, API server in lightweight containers
6
- # - NIM and Cloudflare as cloud fallback
7
  # - Everything runs natively on Apple Silicon
8
 
9
  services:
@@ -25,18 +26,22 @@ services:
25
  - BUDGET_USD_PER_SESSION=10.0
26
  - CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
27
  - CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
 
 
 
 
 
28
  # NIM primary (cloud GPU)
29
  - NIM_API_BASE=https://integrate.api.nvidia.com/v1
30
  - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
31
  # Cloudflare secondary fallback
32
  - CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
33
  - CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
34
- # Fallback config
35
- - FALLBACK_ENABLED=true
36
- - FALLBACK_PRIMARY=nim
37
- - FALLBACK_SECONDARY=cloudflare
38
  # MLX local (runs on host Metal, not in container)
39
- - MLX_ENABLED=false
40
  - MLX_API_BASE=http://host.docker.internal:8000/v1
41
  - LOG_LEVEL=INFO
42
  depends_on:
 
1
  version: "3.8"
2
 
3
+ # Optimized for MacBook M2 Pro Max 96GB with Gemma 4 support
4
  # - MLX local inference runs natively on Metal (host, not in Docker)
5
+ # - Gemma 4 31B-BF16 fits comfortably in 96GB unified memory
6
  # - Redis, Postgres, API server in lightweight containers
7
+ # - NIM Cloudflare Gemini → MLX fallback chain
8
  # - Everything runs natively on Apple Silicon
9
 
10
  services:
 
26
  - BUDGET_USD_PER_SESSION=10.0
27
  - CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
28
  - CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
29
+ # Fallback chain: NIM → Cloudflare → Gemini → MLX
30
+ - FALLBACK_ENABLED=true
31
+ - FALLBACK_PRIMARY=nim
32
+ - FALLBACK_SECONDARY=cloudflare
33
+ - FALLBACK_TERTIARY=gemini
34
  # NIM primary (cloud GPU)
35
  - NIM_API_BASE=https://integrate.api.nvidia.com/v1
36
  - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
37
  # Cloudflare secondary fallback
38
  - CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
39
  - CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
40
+ # Google Gemini tertiary fallback
41
+ - GEMINI_API_KEY=${GEMINI_API_KEY:-}
42
+ - GEMINI_API_BASE=https://generativelanguage.googleapis.com/v1beta/openai
 
43
  # MLX local (runs on host Metal, not in container)
44
+ - MLX_ENABLED=${MLX_ENABLED:-false}
45
  - MLX_API_BASE=http://host.docker.internal:8000/v1
46
  - LOG_LEVEL=INFO
47
  depends_on: