raazkumar commited on
Commit
7dd4832
·
verified ·
1 Parent(s): 96db982

Upload production/docker-compose.yml

Browse files
Files changed (1) hide show
  1. production/docker-compose.yml +188 -0
production/docker-compose.yml ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.8"
2
+
3
+ services:
4
+ api:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile.prod
8
+ ports:
9
+ - "8000:8000"
10
+ environment:
11
+ - PORT=8000
12
+ - WORKERS=4
13
+ - REDIS_URL=redis://redis:6379
14
+ - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
15
+ - MAX_CONCURRENT_REQUESTS=200
16
+ - DEFAULT_RPM_LIMIT=40
17
+ - REQUEST_TIMEOUT=120
18
+ - CACHE_TTL_SECONDS=300
19
+ - BUDGET_USD_PER_SESSION=10.0
20
+ - CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
21
+ - CIRCUIT_BREAKER_RECOVERY_TIMEOUT=60
22
+ - HF_TOKEN=${HF_TOKEN}
23
+ - ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
24
+ - OPENAI_API_KEY=${OPENAI_API_KEY:-}
25
+ - GROQ_API_KEY=${GROQ_API_KEY:-}
26
+ - NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
27
+ - LOG_LEVEL=INFO
28
+ depends_on:
29
+ redis:
30
+ condition: service_healthy
31
+ postgres:
32
+ condition: service_healthy
33
+ networks:
34
+ - ml_intern_network
35
+ deploy:
36
+ replicas: 2
37
+ resources:
38
+ limits:
39
+ cpus: '4'
40
+ memory: 4G
41
+ healthcheck:
42
+ test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
43
+ interval: 30s
44
+ timeout: 10s
45
+ retries: 3
46
+ start_period: 40s
47
+ restart: unless-stopped
48
+
49
+ worker:
50
+ build:
51
+ context: .
52
+ dockerfile: Dockerfile.prod
53
+ command: ["python", "-m", "worker"]
54
+ environment:
55
+ - REDIS_URL=redis://redis:6379
56
+ - DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
57
+ - LOG_LEVEL=INFO
58
+ depends_on:
59
+ redis:
60
+ condition: service_healthy
61
+ postgres:
62
+ condition: service_healthy
63
+ networks:
64
+ - ml_intern_network
65
+ deploy:
66
+ replicas: 2
67
+ resources:
68
+ limits:
69
+ cpus: '2'
70
+ memory: 2G
71
+ restart: unless-stopped
72
+
73
+ redis:
74
+ image: redis:7-alpine
75
+ ports:
76
+ - "6379:6379"
77
+ volumes:
78
+ - redis_data:/data
79
+ command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru
80
+ networks:
81
+ - ml_intern_network
82
+ healthcheck:
83
+ test: ["CMD", "redis-cli", "ping"]
84
+ interval: 10s
85
+ timeout: 3s
86
+ retries: 3
87
+ restart: unless-stopped
88
+
89
+ postgres:
90
+ image: postgres:16-alpine
91
+ ports:
92
+ - "5432:5432"
93
+ environment:
94
+ - POSTGRES_USER=ml_intern
95
+ - POSTGRES_PASSWORD=ml_intern
96
+ - POSTGRES_DB=ml_intern
97
+ volumes:
98
+ - postgres_data:/var/lib/postgresql/data
99
+ - ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
100
+ networks:
101
+ - ml_intern_network
102
+ healthcheck:
103
+ test: ["CMD-SHELL", "pg_isready -U ml_intern"]
104
+ interval: 10s
105
+ timeout: 3s
106
+ retries: 5
107
+ restart: unless-stopped
108
+
109
+ nginx:
110
+ image: nginx:alpine
111
+ ports:
112
+ - "80:80"
113
+ - "443:443"
114
+ volumes:
115
+ - ./nginx.conf:/etc/nginx/nginx.conf:ro
116
+ - ./ssl:/etc/nginx/ssl:ro
117
+ depends_on:
118
+ - api
119
+ networks:
120
+ - ml_intern_network
121
+ restart: unless-stopped
122
+
123
+ prometheus:
124
+ image: prom/prometheus:latest
125
+ ports:
126
+ - "9090:9090"
127
+ volumes:
128
+ - ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
129
+ - prometheus_data:/prometheus
130
+ command:
131
+ - '--config.file=/etc/prometheus/prometheus.yml'
132
+ - '--storage.tsdb.path=/prometheus'
133
+ - '--storage.tsdb.retention.time=30d'
134
+ - '--web.enable-lifecycle'
135
+ networks:
136
+ - ml_intern_network
137
+ restart: unless-stopped
138
+
139
+ grafana:
140
+ image: grafana/grafana:latest
141
+ ports:
142
+ - "3000:3000"
143
+ environment:
144
+ - GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
145
+ - GF_INSTALL_PLUGINS=grafana-piechart-panel
146
+ volumes:
147
+ - grafana_data:/var/lib/grafana
148
+ - ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
149
+ - ./grafana/datasources:/etc/grafana/provisioning/datasources:ro
150
+ depends_on:
151
+ - prometheus
152
+ networks:
153
+ - ml_intern_network
154
+ restart: unless-stopped
155
+
156
+ jaeger:
157
+ image: jaegertracing/all-in-one:latest
158
+ ports:
159
+ - "16686:16686"
160
+ - "14268:14268"
161
+ environment:
162
+ - COLLECTOR_OTLP_ENABLED=true
163
+ networks:
164
+ - ml_intern_network
165
+ restart: unless-stopped
166
+
167
+ pgadmin:
168
+ image: dpage/pgadmin4:latest
169
+ ports:
170
+ - "5050:80"
171
+ environment:
172
+ - PGADMIN_DEFAULT_EMAIL=admin@mlintern.local
173
+ - PGADMIN_DEFAULT_PASSWORD=${PGADMIN_PASSWORD:-admin}
174
+ depends_on:
175
+ - postgres
176
+ networks:
177
+ - ml_intern_network
178
+ restart: unless-stopped
179
+
180
+ volumes:
181
+ redis_data:
182
+ postgres_data:
183
+ prometheus_data:
184
+ grafana_data:
185
+
186
+ networks:
187
+ ml_intern_network:
188
+ driver: bridge