| #!/bin/bash |
| set -e |
| NAMESPACE="ml-intern" |
| IMAGE_TAG="${IMAGE_TAG:-latest}" |
| REGISTRY="${REGISTRY:-ghcr.io/huggingface}" |
|
|
| echo "=== ml-intern Production Deployment ===" |
| echo "Namespace: $NAMESPACE" |
| echo "Image: $REGISTRY/ml-intern:$IMAGE_TAG" |
| echo "" |
|
|
| kubectl apply -f k8s/namespace.yml |
| kubectl apply -f k8s/configmap.yml |
|
|
| if ! kubectl -n $NAMESPACE get secret ml-intern-secrets >/dev/null 2>&1; then |
| echo "Creating secrets..." |
| read -sp "HF Token: " HF_TOKEN && echo |
| read -sp "Anthropic API Key (optional): " ANTHROPIC_KEY && echo |
| read -sp "OpenAI API Key (optional): " OPENAI_KEY && echo |
| read -sp "Groq API Key (optional): " GROQ_KEY && echo |
| read -sp "NVIDIA API Key (optional): " NVIDIA_KEY && echo |
| read -sp "Postgres Password: " PG_PASSWORD && echo |
| cat <<EOF | kubectl apply -f - |
| apiVersion: v1 |
| kind: Secret |
| metadata: |
| name: ml-intern-secrets |
| namespace: $NAMESPACE |
| type: Opaque |
| stringData: |
| HF_TOKEN: "$HF_TOKEN" |
| ANTHROPIC_API_KEY: "$ANTHROPIC_KEY" |
| OPENAI_API_KEY: "$OPENAI_KEY" |
| GROQ_API_KEY: "$GROQ_KEY" |
| NVIDIA_API_KEY: "$NVIDIA_KEY" |
| DATABASE_URL: "postgresql://ml_intern:$PG_PASSWORD@postgres:5432/ml_intern" |
| REDIS_URL: "redis://redis:6379" |
| EOF |
| fi |
|
|
| echo "Deploying stateful services..." |
| kubectl apply -f k8s/stateful-services.yml |
| kubectl -n $NAMESPACE rollout status statefulset/redis --timeout=120s |
| kubectl -n $NAMESPACE rollout status statefulset/postgres --timeout=120s |
|
|
| echo "Initializing database..." |
| kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=postgres --timeout=60s |
| POD=$(kubectl -n $NAMESPACE get pod -l app=postgres -o jsonpath='{.items[0].metadata.name}') |
| kubectl -n $NAMESPACE cp init.sql $POD:/tmp/init.sql |
| kubectl -n $NAMESPACE exec $POD -- psql -U ml_intern -d ml_intern -f /tmp/init.sql |
|
|
| echo "Deploying workers..." |
| kubectl apply -f k8s/deployment-worker.yml |
| kubectl -n $NAMESPACE rollout status deployment/ml-intern-worker --timeout=120s |
|
|
| echo "Deploying API servers..." |
| kubectl apply -f k8s/deployment-api.yml |
| kubectl -n $NAMESPACE rollout status deployment/ml-intern-api --timeout=180s |
|
|
| echo "" |
| echo "=== Deployment Complete ===" |
| echo "" |
| echo "Pods:" |
| kubectl -n $NAMESPACE get pods |
|
|
| echo "" |
| echo "Services:" |
| kubectl -n $NAMESPACE get services |
|
|
| echo "" |
| echo "API Health Check:" |
| API_POD=$(kubectl -n $NAMESPACE get pod -l app=ml-intern-api -o jsonpath='{.items[0].metadata.name}') |
| kubectl -n $NAMESPACE exec $API_POD -- curl -s http://localhost:8000/health | python3 -m json.tool || true |
|
|
| echo "" |
| echo "Port-forward for local access:" |
| echo " kubectl -n $NAMESPACE port-forward svc/ml-intern-api 8080:8000" |
| echo " curl http://localhost:8080/health" |
| echo "" |
| echo "To scale API:" |
| echo " kubectl -n $NAMESPACE scale deployment ml-intern-api --replicas=5" |
| echo "" |
| echo "To view logs:" |
| echo " kubectl -n $NAMESPACE logs -f deployment/ml-intern-api" |
| echo "" |
| echo "To teardown:" |
| echo " kubectl delete namespace $NAMESPACE" |
|
|