File size: 3,721 Bytes
77f8256
 
091c7e0
77f8256
091c7e0
77f8256
091c7e0
77f8256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
091c7e0
 
 
 
 
77f8256
 
 
 
 
 
091c7e0
 
 
77f8256
091c7e0
77f8256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
version: "3.8"

# Optimized for MacBook M2 Pro Max 96GB with Gemma 4 support
# - MLX local inference runs natively on Metal (host, not in Docker)
# - Gemma 4 31B-BF16 fits comfortably in 96GB unified memory
# - Redis, Postgres, API server in lightweight containers
# - NIM → Cloudflare → Gemini → MLX fallback chain
# - Everything runs natively on Apple Silicon

services:
  api:
    build:
      context: .
      dockerfile: Dockerfile.prod
    ports:
      - "8000:8000"
    environment:
      - PORT=8000
      - WORKERS=2
      - REDIS_URL=redis://redis:6379
      - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
      - MAX_CONCURRENT_REQUESTS=100
      - DEFAULT_RPM_LIMIT=40
      - REQUEST_TIMEOUT=120
      - CACHE_TTL_SECONDS=300
      - BUDGET_USD_PER_SESSION=10.0
      - CIRCUIT_BREAKER_FAILURE_THRESHOLD=3
      - CIRCUIT_BREAKER_RECOVERY_TIMEOUT=30
      # Fallback chain: NIM → Cloudflare → Gemini → MLX
      - FALLBACK_ENABLED=true
      - FALLBACK_PRIMARY=nim
      - FALLBACK_SECONDARY=cloudflare
      - FALLBACK_TERTIARY=gemini
      # NIM primary (cloud GPU)
      - NIM_API_BASE=https://integrate.api.nvidia.com/v1
      - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
      # Cloudflare secondary fallback
      - CLOUDFLARE_API_KEY=${CLOUDFLARE_API_KEY:-}
      - CLOUDFLARE_ACCOUNT_ID=${CLOUDFLARE_ACCOUNT_ID:-}
      # Google Gemini tertiary fallback
      - GEMINI_API_KEY=${GEMINI_API_KEY:-}
      - GEMINI_API_BASE=https://generativelanguage.googleapis.com/v1beta/openai
      # MLX local (runs on host Metal, not in container)
      - MLX_ENABLED=${MLX_ENABLED:-false}
      - MLX_API_BASE=http://host.docker.internal:8000/v1
      - LOG_LEVEL=INFO
    depends_on:
      redis:
        condition: service_healthy
      postgres:
        condition: service_healthy
    networks:
      - ml_intern_network
    healthcheck:
      test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
      interval: 30s
      timeout: 10s
      retries: 3
      start_period: 40s
    restart: unless-stopped

  worker:
    build:
      context: .
      dockerfile: Dockerfile.prod
    command: ["python", "-m", "production.worker"]
    environment:
      - REDIS_URL=redis://redis:6379
      - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
      - LOG_LEVEL=INFO
    depends_on:
      redis:
        condition: service_healthy
      postgres:
        condition: service_healthy
    networks:
      - ml_intern_network
    restart: unless-stopped

  redis:
    image: redis:7-alpine
    ports:
      - "127.0.0.1:6379:6379"
    volumes:
      - redis_data:/data
    command: redis-server --appendonly yes --maxmemory 512mb --maxmemory-policy allkeys-lru
    networks:
      - ml_intern_network
    healthcheck:
      test: ["CMD", "redis-cli", "ping"]
      interval: 10s
      timeout: 3s
      retries: 3
    restart: unless-stopped

  postgres:
    image: postgres:16-alpine
    ports:
      - "127.0.0.1:5432:5432"
    environment:
      - POSTGRES_USER=ml_intern
      - POSTGRES_PASSWORD=ml_intern
      - POSTGRES_DB=ml_intern
    volumes:
      - postgres_data:/var/lib/postgresql/data
      - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
    networks:
      - ml_intern_network
    healthcheck:
      test: ["CMD-SHELL", "pg_isready -U ml_intern"]
      interval: 10s
      timeout: 3s
      retries: 5
    restart: unless-stopped

  nginx:
    image: nginx:alpine
    ports:
      - "80:80"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
    depends_on:
      - api
    networks:
      - ml_intern_network
    restart: unless-stopped

volumes:
  redis_data:
  postgres_data:

networks:
  ml_intern_network:
    driver: bridge