raazkumar's picture
Upload production/k8s/deploy.sh
3910229 verified
#!/bin/bash
set -e
NAMESPACE="ml-intern"
IMAGE_TAG="${IMAGE_TAG:-latest}"
REGISTRY="${REGISTRY:-ghcr.io/huggingface}"
echo "=== ml-intern Production Deployment ==="
echo "Namespace: $NAMESPACE"
echo "Image: $REGISTRY/ml-intern:$IMAGE_TAG"
echo ""
kubectl apply -f k8s/namespace.yml
kubectl apply -f k8s/configmap.yml
if ! kubectl -n $NAMESPACE get secret ml-intern-secrets >/dev/null 2>&1; then
echo "Creating secrets..."
read -sp "HF Token: " HF_TOKEN && echo
read -sp "Anthropic API Key (optional): " ANTHROPIC_KEY && echo
read -sp "OpenAI API Key (optional): " OPENAI_KEY && echo
read -sp "Groq API Key (optional): " GROQ_KEY && echo
read -sp "NVIDIA API Key (optional): " NVIDIA_KEY && echo
read -sp "Postgres Password: " PG_PASSWORD && echo
cat <<EOF | kubectl apply -f -
apiVersion: v1
kind: Secret
metadata:
name: ml-intern-secrets
namespace: $NAMESPACE
type: Opaque
stringData:
HF_TOKEN: "$HF_TOKEN"
ANTHROPIC_API_KEY: "$ANTHROPIC_KEY"
OPENAI_API_KEY: "$OPENAI_KEY"
GROQ_API_KEY: "$GROQ_KEY"
NVIDIA_API_KEY: "$NVIDIA_KEY"
DATABASE_URL: "postgresql://ml_intern:$PG_PASSWORD@postgres:5432/ml_intern"
REDIS_URL: "redis://redis:6379"
EOF
fi
echo "Deploying stateful services..."
kubectl apply -f k8s/stateful-services.yml
kubectl -n $NAMESPACE rollout status statefulset/redis --timeout=120s
kubectl -n $NAMESPACE rollout status statefulset/postgres --timeout=120s
echo "Initializing database..."
kubectl -n $NAMESPACE wait --for=condition=ready pod -l app=postgres --timeout=60s
POD=$(kubectl -n $NAMESPACE get pod -l app=postgres -o jsonpath='{.items[0].metadata.name}')
kubectl -n $NAMESPACE cp init.sql $POD:/tmp/init.sql
kubectl -n $NAMESPACE exec $POD -- psql -U ml_intern -d ml_intern -f /tmp/init.sql
echo "Deploying workers..."
kubectl apply -f k8s/deployment-worker.yml
kubectl -n $NAMESPACE rollout status deployment/ml-intern-worker --timeout=120s
echo "Deploying API servers..."
kubectl apply -f k8s/deployment-api.yml
kubectl -n $NAMESPACE rollout status deployment/ml-intern-api --timeout=180s
echo ""
echo "=== Deployment Complete ==="
echo ""
echo "Pods:"
kubectl -n $NAMESPACE get pods
echo ""
echo "Services:"
kubectl -n $NAMESPACE get services
echo ""
echo "API Health Check:"
API_POD=$(kubectl -n $NAMESPACE get pod -l app=ml-intern-api -o jsonpath='{.items[0].metadata.name}')
kubectl -n $NAMESPACE exec $API_POD -- curl -s http://localhost:8000/health | python3 -m json.tool || true
echo ""
echo "Port-forward for local access:"
echo " kubectl -n $NAMESPACE port-forward svc/ml-intern-api 8080:8000"
echo " curl http://localhost:8080/health"
echo ""
echo "To scale API:"
echo " kubectl -n $NAMESPACE scale deployment ml-intern-api --replicas=5"
echo ""
echo "To view logs:"
echo " kubectl -n $NAMESPACE logs -f deployment/ml-intern-api"
echo ""
echo "To teardown:"
echo " kubectl delete namespace $NAMESPACE"