Spaces:
Runtime error
Runtime error
| # Cloud Run health gate for movimento-text-encoder. | |
| # Usage: | |
| # PROJECT_ID=my-project REGION=europe-west1 ./cloud-run/health_gate_text_encoder.sh | |
| # PROJECT_ID=my-project REGION=europe-west1 SERVICE_NAME=movimento-text-encoder ./cloud-run/health_gate_text_encoder.sh | |
| set -euo pipefail | |
| : "${PROJECT_ID:?Set PROJECT_ID}" | |
| REGION="${REGION:-europe-west1}" | |
| SERVICE_NAME="${SERVICE_NAME:-movimento-text-encoder}" | |
| DEMO_SERVICE_NAME="${DEMO_SERVICE_NAME:-kimodo-demo}" | |
| HF_SECRET_NAME="${HF_SECRET_NAME:-hf-token}" | |
| GATE_TIMEOUT_SEC="${GATE_TIMEOUT_SEC:-120}" | |
| GATE_RETRY_INTERVAL_SEC="${GATE_RETRY_INTERVAL_SEC:-5}" | |
| if ! command -v gcloud >/dev/null 2>&1; then | |
| echo "FAIL: gcloud CLI not found" | |
| exit 2 | |
| fi | |
| ENCODER_JSON="$(gcloud run services describe "$SERVICE_NAME" \ | |
| --region "$REGION" \ | |
| --project "$PROJECT_ID" \ | |
| --format=json)" | |
| readarray -t ENCODER_FIELDS < <(python - <<'PY' "$ENCODER_JSON" "$HF_SECRET_NAME" | |
| import json | |
| import sys | |
| service = json.loads(sys.argv[1]) | |
| expected_secret = sys.argv[2] | |
| conditions = service.get("status", {}).get("conditions", []) | |
| ready = "Unknown" | |
| for cond in conditions: | |
| if cond.get("type") == "Ready": | |
| ready = cond.get("status", "Unknown") | |
| break | |
| url = service.get("status", {}).get("url", "") | |
| latest_ready = service.get("status", {}).get("latestReadyRevisionName", "") | |
| traffic = service.get("status", {}).get("traffic", []) | |
| latest_receives_traffic = "False" | |
| for item in traffic: | |
| if item.get("latestRevision") is True and int(item.get("percent", 0)) > 0: | |
| latest_receives_traffic = "True" | |
| break | |
| spec_env = service.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [{}])[0].get("env", []) | |
| secret_names = [] | |
| for env in spec_env: | |
| value_from = env.get("valueFrom") or {} | |
| key_ref = value_from.get("secretKeyRef") or {} | |
| name = key_ref.get("name") | |
| if name: | |
| secret_names.append(name) | |
| secret_wiring = "PASS" if expected_secret in secret_names and "HF_TOKEN_SECRET_NAME" not in secret_names else "FAIL" | |
| print(ready) | |
| print(url) | |
| print(latest_ready) | |
| print(latest_receives_traffic) | |
| print(secret_wiring) | |
| PY | |
| ) | |
| READY_STATUS="${ENCODER_FIELDS[0]}" | |
| ENCODER_URL="${ENCODER_FIELDS[1]}" | |
| LATEST_READY_REV="${ENCODER_FIELDS[2]}" | |
| LATEST_TRAFFIC="${ENCODER_FIELDS[3]}" | |
| SECRET_WIRING="${ENCODER_FIELDS[4]}" | |
| if [[ -z "$ENCODER_URL" ]]; then | |
| echo "Service Ready: ${READY_STATUS}" | |
| echo "Revision Traffic: ${LATEST_TRAFFIC}" | |
| echo "Encoder URL Check: FAIL (missing URL)" | |
| echo "Secret Wiring: ${SECRET_WIRING}" | |
| echo "Failure Logs: FAIL" | |
| echo "Dependency Contract: FAIL" | |
| echo "[FAIL] Encoder service URL is empty" | |
| exit 1 | |
| fi | |
| deadline=$((SECONDS + GATE_TIMEOUT_SEC)) | |
| endpoint_ok="false" | |
| latency_ms="" | |
| while (( SECONDS < deadline )); do | |
| if latency_ms=$(python - <<'PY' "$ENCODER_URL" | |
| import sys | |
| import time | |
| import urllib.request | |
| url = sys.argv[1] | |
| start = time.time() | |
| with urllib.request.urlopen(url, timeout=10) as resp: | |
| if resp.status < 500: | |
| elapsed = int((time.time() - start) * 1000) | |
| print(elapsed) | |
| else: | |
| raise RuntimeError(f"status={resp.status}") | |
| PY | |
| 2>/dev/null); then | |
| endpoint_ok="true" | |
| break | |
| fi | |
| sleep "$GATE_RETRY_INTERVAL_SEC" | |
| done | |
| demo_contract="SKIPPED" | |
| if gcloud run services describe "$DEMO_SERVICE_NAME" --region "$REGION" --project "$PROJECT_ID" >/dev/null 2>&1; then | |
| DEMO_JSON="$(gcloud run services describe "$DEMO_SERVICE_NAME" --region "$REGION" --project "$PROJECT_ID" --format=json)" | |
| demo_url_match=$(python - <<'PY' "$DEMO_JSON" "$ENCODER_URL" | |
| import json | |
| import sys | |
| service = json.loads(sys.argv[1]) | |
| encoder_url = sys.argv[2].rstrip('/') + '/' | |
| envs = service.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [{}])[0].get("env", []) | |
| configured = None | |
| for env in envs: | |
| if env.get("name") == "TEXT_ENCODER_URL": | |
| configured = (env.get("value") or "").rstrip('/') + '/' | |
| break | |
| if configured == encoder_url: | |
| print("PASS") | |
| else: | |
| print("FAIL") | |
| PY | |
| ) | |
| demo_contract="$demo_url_match" | |
| fi | |
| echo "Service Ready: ${READY_STATUS}" | |
| echo "Latest Ready Revision: ${LATEST_READY_REV}" | |
| echo "Revision Traffic: ${LATEST_TRAFFIC}" | |
| if [[ "$endpoint_ok" == "true" ]]; then | |
| echo "Encoder URL Check: PASS (${latency_ms}ms)" | |
| else | |
| echo "Encoder URL Check: FAIL (timeout after ${GATE_TIMEOUT_SEC}s)" | |
| fi | |
| echo "Secret Wiring: ${SECRET_WIRING}" | |
| if [[ "$READY_STATUS" == "True" && "$LATEST_TRAFFIC" == "True" ]]; then | |
| echo "Failure Logs: PASS" | |
| else | |
| echo "Failure Logs: FAIL" | |
| fi | |
| echo "Dependency Contract: ${demo_contract}" | |
| if [[ "$READY_STATUS" != "True" || "$LATEST_TRAFFIC" != "True" || "$endpoint_ok" != "true" || "$SECRET_WIRING" != "PASS" ]]; then | |
| echo "[FAIL] Cloud Run encoder health gate failed" | |
| exit 1 | |
| fi | |
| if [[ "$demo_contract" == "FAIL" ]]; then | |
| echo "[FAIL] Demo TEXT_ENCODER_URL does not match encoder URL" | |
| exit 1 | |
| fi | |
| echo "[PASS] Cloud Run encoder health gate passed" | |