raazkumar commited on
Commit
3910229
·
verified ·
1 Parent(s): a32fac2

Upload production/k8s/deploy.sh

Browse files
Files changed (1) hide show
  1. production/k8s/deploy.sh +87 -0
production/k8s/deploy.sh ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+ NAMESPACE="ml-intern"
4
+ IMAGE_TAG="${IMAGE_TAG:-latest}"
5
+ REGISTRY="${REGISTRY:-ghcr.io/huggingface}"
6
+
7
+ echo "=== ml-intern Production Deployment ==="
8
+ echo "Namespace: $NAMESPACE"
9
+ echo "Image: $REGISTRY/ml-intern:$IMAGE_TAG"
10
+ echo ""
11
+
12
+ kubectl apply -f k8s/namespace.yml
13
+ kubectl apply -f k8s/configmap.yml
14
+
15
+ if ! kubectl -n $NAMESPACE get secret ml-intern-secrets >/dev/null 2>&1; then
16
+ echo "Creating secrets..."
17
+ read -sp "HF Token: " HF_TOKEN && echo
18
+ read -sp "Anthropic API Key (optional): " ANTHROPIC_KEY && echo
19
+ read -sp "OpenAI API Key (optional): " OPENAI_KEY && echo
20
+ read -sp "Groq API Key (optional): " GROQ_KEY && echo
21
+ read -sp "NVIDIA API Key (optional): " NVIDIA_KEY && echo
22
+ read -sp "Postgres Password: " PG_PASSWORD && echo
23
+ cat <<EOF | kubectl apply -f -
24
+ apiVersion: v1
25
+ kind: Secret
26
+ metadata:
27
+ name: ml-intern-secrets
28
+ namespace: $NAMESPACE
29
+ type: Opaque
30
+ stringData:
31
+ HF_TOKEN: "$HF_TOKEN"
32
+ ANTHROPIC_API_KEY: "$ANTHROPIC_KEY"
33
+ OPENAI_API_KEY: "$OPENAI_KEY"
34
+ GROQ_API_KEY: "$GROQ_KEY"
35
+ NVIDIA_API_KEY: "$NVIDIA_KEY"
36
+ DATABASE_URL: "postgresql://ml_intern:$PG_PASSWORD@postgres:5432/ml_intern"
37
+ REDIS_URL: "redis://redis:6379"
38
+ EOF
39
+ fi
40
+
41
+ echo "Deploying stateful services..."
42
+ kubectl apply -f k8s/stateful-services.yml
43
+ kubectl -n $NAMESPACE rollout status statefulset/redis --timeout=120s
44
+ kubectl -n $NAMESPACE rollout status statefulset/postgres --timeout=120s
45
+
46
+ echo "Initializing database..."
47
+ kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=postgres --timeout=60s
48
+ POD=$(kubectl -n $NAMESPACE get pod -l app=postgres -o jsonpath='{.items[0].metadata.name}')
49
+ kubectl -n $NAMESPACE cp init.sql $POD:/tmp/init.sql
50
+ kubectl -n $NAMESPACE exec $POD -- psql -U ml_intern -d ml_intern -f /tmp/init.sql
51
+
52
+ echo "Deploying workers..."
53
+ kubectl apply -f k8s/deployment-worker.yml
54
+ kubectl -n $NAMESPACE rollout status deployment/ml-intern-worker --timeout=120s
55
+
56
+ echo "Deploying API servers..."
57
+ kubectl apply -f k8s/deployment-api.yml
58
+ kubectl -n $NAMESPACE rollout status deployment/ml-intern-api --timeout=180s
59
+
60
+ echo ""
61
+ echo "=== Deployment Complete ==="
62
+ echo ""
63
+ echo "Pods:"
64
+ kubectl -n $NAMESPACE get pods
65
+
66
+ echo ""
67
+ echo "Services:"
68
+ kubectl -n $NAMESPACE get services
69
+
70
+ echo ""
71
+ echo "API Health Check:"
72
+ API_POD=$(kubectl -n $NAMESPACE get pod -l app=ml-intern-api -o jsonpath='{.items[0].metadata.name}')
73
+ kubectl -n $NAMESPACE exec $API_POD -- curl -s http://localhost:8000/health | python3 -m json.tool || true
74
+
75
+ echo ""
76
+ echo "Port-forward for local access:"
77
+ echo " kubectl -n $NAMESPACE port-forward svc/ml-intern-api 8080:8000"
78
+ echo " curl http://localhost:8080/health"
79
+ echo ""
80
+ echo "To scale API:"
81
+ echo " kubectl -n $NAMESPACE scale deployment ml-intern-api --replicas=5"
82
+ echo ""
83
+ echo "To view logs:"
84
+ echo " kubectl -n $NAMESPACE logs -f deployment/ml-intern-api"
85
+ echo ""
86
+ echo "To teardown:"
87
+ echo " kubectl delete namespace $NAMESPACE"