File size: 4,518 Bytes
7dd4832 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 | version: "3.8"
services:
api:
build:
context: .
dockerfile: Dockerfile.prod
ports:
- "8000:8000"
environment:
- PORT=8000
- WORKERS=4
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- MAX_CONCURRENT_REQUESTS=200
- DEFAULT_RPM_LIMIT=40
- REQUEST_TIMEOUT=120
- CACHE_TTL_SECONDS=300
- BUDGET_USD_PER_SESSION=10.0
- CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
- CIRCUIT_BREAKER_RECOVERY_TIMEOUT=60
- HF_TOKEN=${HF_TOKEN}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- GROQ_API_KEY=${GROQ_API_KEY:-}
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
deploy:
replicas: 2
resources:
limits:
cpus: '4'
memory: 4G
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
worker:
build:
context: .
dockerfile: Dockerfile.prod
command: ["python", "-m", "worker"]
environment:
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
deploy:
replicas: 2
resources:
limits:
cpus: '2'
memory: 2G
restart: unless-stopped
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru
networks:
- ml_intern_network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
restart: unless-stopped
postgres:
image: postgres:16-alpine
ports:
- "5432:5432"
environment:
- POSTGRES_USER=ml_intern
- POSTGRES_PASSWORD=ml_intern
- POSTGRES_DB=ml_intern
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
networks:
- ml_intern_network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ml_intern"]
interval: 10s
timeout: 3s
retries: 5
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./ssl:/etc/nginx/ssl:ro
depends_on:
- api
networks:
- ml_intern_network
restart: unless-stopped
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
networks:
- ml_intern_network
restart: unless-stopped
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
- GF_INSTALL_PLUGINS=grafana-piechart-panel
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./grafana/datasources:/etc/grafana/provisioning/datasources:ro
depends_on:
- prometheus
networks:
- ml_intern_network
restart: unless-stopped
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686"
- "14268:14268"
environment:
- COLLECTOR_OTLP_ENABLED=true
networks:
- ml_intern_network
restart: unless-stopped
pgadmin:
image: dpage/pgadmin4:latest
ports:
- "5050:80"
environment:
- PGADMIN_DEFAULT_EMAIL=admin@mlintern.local
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_PASSWORD:-admin}
depends_on:
- postgres
networks:
- ml_intern_network
restart: unless-stopped
volumes:
redis_data:
postgres_data:
prometheus_data:
grafana_data:
networks:
ml_intern_network:
driver: bridge
|