ml-intern-local-fork / production /docker-compose.yml
raazkumar's picture
Upload production/docker-compose.yml
7dd4832 verified
version: "3.8"
services:
api:
build:
context: .
dockerfile: Dockerfile.prod
ports:
- "8000:8000"
environment:
- PORT=8000
- WORKERS=4
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- MAX_CONCURRENT_REQUESTS=200
- DEFAULT_RPM_LIMIT=40
- REQUEST_TIMEOUT=120
- CACHE_TTL_SECONDS=300
- BUDGET_USD_PER_SESSION=10.0
- CIRCUIT_BREAKER_FAILURE_THRESHOLD=5
- CIRCUIT_BREAKER_RECOVERY_TIMEOUT=60
- HF_TOKEN=${HF_TOKEN}
- ANTHROPIC_API_KEY=${ANTHROPIC_API_KEY:-}
- OPENAI_API_KEY=${OPENAI_API_KEY:-}
- GROQ_API_KEY=${GROQ_API_KEY:-}
- NVIDIA_API_KEY=${NVIDIA_API_KEY:-}
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
deploy:
replicas: 2
resources:
limits:
cpus: '4'
memory: 4G
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 40s
restart: unless-stopped
worker:
build:
context: .
dockerfile: Dockerfile.prod
command: ["python", "-m", "worker"]
environment:
- REDIS_URL=redis://redis:6379
- DATABASE_URL=postgresql://ml_intern:ml_intern@postgres:5432/ml_intern
- LOG_LEVEL=INFO
depends_on:
redis:
condition: service_healthy
postgres:
condition: service_healthy
networks:
- ml_intern_network
deploy:
replicas: 2
resources:
limits:
cpus: '2'
memory: 2G
restart: unless-stopped
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
command: redis-server --appendonly yes --maxmemory 1gb --maxmemory-policy allkeys-lru
networks:
- ml_intern_network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 3
restart: unless-stopped
postgres:
image: postgres:16-alpine
ports:
- "5432:5432"
environment:
- POSTGRES_USER=ml_intern
- POSTGRES_PASSWORD=ml_intern
- POSTGRES_DB=ml_intern
volumes:
- postgres_data:/var/lib/postgresql/data
- ./init.sql:/docker-entrypoint-initdb.d/init.sql:ro
networks:
- ml_intern_network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ml_intern"]
interval: 10s
timeout: 3s
retries: 5
restart: unless-stopped
nginx:
image: nginx:alpine
ports:
- "80:80"
- "443:443"
volumes:
- ./nginx.conf:/etc/nginx/nginx.conf:ro
- ./ssl:/etc/nginx/ssl:ro
depends_on:
- api
networks:
- ml_intern_network
restart: unless-stopped
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml:ro
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--storage.tsdb.retention.time=30d'
- '--web.enable-lifecycle'
networks:
- ml_intern_network
restart: unless-stopped
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=${GRAFANA_PASSWORD:-admin}
- GF_INSTALL_PLUGINS=grafana-piechart-panel
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./grafana/datasources:/etc/grafana/provisioning/datasources:ro
depends_on:
- prometheus
networks:
- ml_intern_network
restart: unless-stopped
jaeger:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686"
- "14268:14268"
environment:
- COLLECTOR_OTLP_ENABLED=true
networks:
- ml_intern_network
restart: unless-stopped
pgadmin:
image: dpage/pgadmin4:latest
ports:
- "5050:80"
environment:
- PGADMIN_DEFAULT_EMAIL=admin@mlintern.local
- PGADMIN_DEFAULT_PASSWORD=${PGADMIN_PASSWORD:-admin}
depends_on:
- postgres
networks:
- ml_intern_network
restart: unless-stopped
volumes:
redis_data:
postgres_data:
prometheus_data:
grafana_data:
networks:
ml_intern_network:
driver: bridge