File size: 2,902 Bytes
3910229
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
set -e
NAMESPACE="ml-intern"
IMAGE_TAG="${IMAGE_TAG:-latest}"
REGISTRY="${REGISTRY:-ghcr.io/huggingface}"

echo "=== ml-intern Production Deployment ==="
echo "Namespace: $NAMESPACE"
echo "Image: $REGISTRY/ml-intern:$IMAGE_TAG"
echo ""

kubectl apply -f k8s/namespace.yml
kubectl apply -f k8s/configmap.yml

if ! kubectl -n $NAMESPACE get secret ml-intern-secrets >/dev/null 2>&1; then
    echo "Creating secrets..."
    read -sp "HF Token: " HF_TOKEN && echo
    read -sp "Anthropic API Key (optional): " ANTHROPIC_KEY && echo
    read -sp "OpenAI API Key (optional): " OPENAI_KEY && echo
    read -sp "Groq API Key (optional): " GROQ_KEY && echo
    read -sp "NVIDIA API Key (optional): " NVIDIA_KEY && echo
    read -sp "Postgres Password: " PG_PASSWORD && echo
    cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
  name: ml-intern-secrets
  namespace: $NAMESPACE
type: Opaque
stringData:
  HF_TOKEN: "$HF_TOKEN"
  ANTHROPIC_API_KEY: "$ANTHROPIC_KEY"
  OPENAI_API_KEY: "$OPENAI_KEY"
  GROQ_API_KEY: "$GROQ_KEY"
  NVIDIA_API_KEY: "$NVIDIA_KEY"
  DATABASE_URL: "postgresql://ml_intern:$PG_PASSWORD@postgres:5432/ml_intern"
  REDIS_URL: "redis://redis:6379"
EOF
fi

echo "Deploying stateful services..."
kubectl apply -f k8s/stateful-services.yml
kubectl -n $NAMESPACE rollout status statefulset/redis --timeout=120s
kubectl -n $NAMESPACE rollout status statefulset/postgres --timeout=120s

echo "Initializing database..."
kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=postgres --timeout=60s
POD=$(kubectl -n $NAMESPACE get pod -l app=postgres -o jsonpath='{.items[0].metadata.name}')
kubectl -n $NAMESPACE cp init.sql $POD:/tmp/init.sql
kubectl -n $NAMESPACE exec $POD -- psql -U ml_intern -d ml_intern -f /tmp/init.sql

echo "Deploying workers..."
kubectl apply -f k8s/deployment-worker.yml
kubectl -n $NAMESPACE rollout status deployment/ml-intern-worker --timeout=120s

echo "Deploying API servers..."
kubectl apply -f k8s/deployment-api.yml
kubectl -n $NAMESPACE rollout status deployment/ml-intern-api --timeout=180s

echo ""
echo "=== Deployment Complete ==="
echo ""
echo "Pods:"
kubectl -n $NAMESPACE get pods

echo ""
echo "Services:"
kubectl -n $NAMESPACE get services

echo ""
echo "API Health Check:"
API_POD=$(kubectl -n $NAMESPACE get pod -l app=ml-intern-api -o jsonpath='{.items[0].metadata.name}')
kubectl -n $NAMESPACE exec $API_POD -- curl -s http://localhost:8000/health | python3 -m json.tool || true

echo ""
echo "Port-forward for local access:"
echo "  kubectl -n $NAMESPACE port-forward svc/ml-intern-api 8080:8000"
echo "  curl http://localhost:8080/health"
echo ""
echo "To scale API:"
echo "  kubectl -n $NAMESPACE scale deployment ml-intern-api --replicas=5"
echo ""
echo "To view logs:"
echo "  kubectl -n $NAMESPACE logs -f deployment/ml-intern-api"
echo ""
echo "To teardown:"
echo "  kubectl delete namespace $NAMESPACE"