File size: 4,925 Bytes
7939f87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
#!/usr/bin/env bash
# Cloud Run health gate for movimento-text-encoder.
# Usage:
#   PROJECT_ID=my-project REGION=europe-west1 ./cloud-run/health_gate_text_encoder.sh
#   PROJECT_ID=my-project REGION=europe-west1 SERVICE_NAME=movimento-text-encoder ./cloud-run/health_gate_text_encoder.sh

set -euo pipefail

: "${PROJECT_ID:?Set PROJECT_ID}"
REGION="${REGION:-europe-west1}"
SERVICE_NAME="${SERVICE_NAME:-movimento-text-encoder}"
DEMO_SERVICE_NAME="${DEMO_SERVICE_NAME:-kimodo-demo}"
HF_SECRET_NAME="${HF_SECRET_NAME:-hf-token}"
GATE_TIMEOUT_SEC="${GATE_TIMEOUT_SEC:-120}"
GATE_RETRY_INTERVAL_SEC="${GATE_RETRY_INTERVAL_SEC:-5}"

if ! command -v gcloud >/dev/null 2>&1; then
  echo "FAIL: gcloud CLI not found"
  exit 2
fi

ENCODER_JSON="$(gcloud run services describe "$SERVICE_NAME" \
  --region "$REGION" \
  --project "$PROJECT_ID" \
  --format=json)"

readarray -t ENCODER_FIELDS < <(python - <<'PY' "$ENCODER_JSON" "$HF_SECRET_NAME"
import json
import sys

service = json.loads(sys.argv[1])
expected_secret = sys.argv[2]

conditions = service.get("status", {}).get("conditions", [])
ready = "Unknown"
for cond in conditions:
    if cond.get("type") == "Ready":
        ready = cond.get("status", "Unknown")
        break

url = service.get("status", {}).get("url", "")
latest_ready = service.get("status", {}).get("latestReadyRevisionName", "")

traffic = service.get("status", {}).get("traffic", [])
latest_receives_traffic = "False"
for item in traffic:
    if item.get("latestRevision") is True and int(item.get("percent", 0)) > 0:
        latest_receives_traffic = "True"
        break

spec_env = service.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [{}])[0].get("env", [])
secret_names = []
for env in spec_env:
    value_from = env.get("valueFrom") or {}
    key_ref = value_from.get("secretKeyRef") or {}
    name = key_ref.get("name")
    if name:
        secret_names.append(name)

secret_wiring = "PASS" if expected_secret in secret_names and "HF_TOKEN_SECRET_NAME" not in secret_names else "FAIL"

print(ready)
print(url)
print(latest_ready)
print(latest_receives_traffic)
print(secret_wiring)
PY
)

READY_STATUS="${ENCODER_FIELDS[0]}"
ENCODER_URL="${ENCODER_FIELDS[1]}"
LATEST_READY_REV="${ENCODER_FIELDS[2]}"
LATEST_TRAFFIC="${ENCODER_FIELDS[3]}"
SECRET_WIRING="${ENCODER_FIELDS[4]}"

if [[ -z "$ENCODER_URL" ]]; then
  echo "Service Ready: ${READY_STATUS}"
  echo "Revision Traffic: ${LATEST_TRAFFIC}"
  echo "Encoder URL Check: FAIL (missing URL)"
  echo "Secret Wiring: ${SECRET_WIRING}"
  echo "Failure Logs: FAIL"
  echo "Dependency Contract: FAIL"
  echo "[FAIL] Encoder service URL is empty"
  exit 1
fi

deadline=$((SECONDS + GATE_TIMEOUT_SEC))
endpoint_ok="false"
latency_ms=""

while (( SECONDS < deadline )); do
  if latency_ms=$(python - <<'PY' "$ENCODER_URL"
import sys
import time
import urllib.request

url = sys.argv[1]
start = time.time()
with urllib.request.urlopen(url, timeout=10) as resp:
    if resp.status < 500:
        elapsed = int((time.time() - start) * 1000)
        print(elapsed)
    else:
        raise RuntimeError(f"status={resp.status}")
PY
 2>/dev/null); then
    endpoint_ok="true"
    break
  fi
  sleep "$GATE_RETRY_INTERVAL_SEC"
done

demo_contract="SKIPPED"
if gcloud run services describe "$DEMO_SERVICE_NAME" --region "$REGION" --project "$PROJECT_ID" >/dev/null 2>&1; then
  DEMO_JSON="$(gcloud run services describe "$DEMO_SERVICE_NAME" --region "$REGION" --project "$PROJECT_ID" --format=json)"
  demo_url_match=$(python - <<'PY' "$DEMO_JSON" "$ENCODER_URL"
import json
import sys

service = json.loads(sys.argv[1])
encoder_url = sys.argv[2].rstrip('/') + '/'

envs = service.get("spec", {}).get("template", {}).get("spec", {}).get("containers", [{}])[0].get("env", [])
configured = None
for env in envs:
    if env.get("name") == "TEXT_ENCODER_URL":
        configured = (env.get("value") or "").rstrip('/') + '/'
        break

if configured == encoder_url:
    print("PASS")
else:
    print("FAIL")
PY
)
  demo_contract="$demo_url_match"
fi

echo "Service Ready: ${READY_STATUS}"
echo "Latest Ready Revision: ${LATEST_READY_REV}"
echo "Revision Traffic: ${LATEST_TRAFFIC}"
if [[ "$endpoint_ok" == "true" ]]; then
  echo "Encoder URL Check: PASS (${latency_ms}ms)"
else
  echo "Encoder URL Check: FAIL (timeout after ${GATE_TIMEOUT_SEC}s)"
fi
echo "Secret Wiring: ${SECRET_WIRING}"
if [[ "$READY_STATUS" == "True" && "$LATEST_TRAFFIC" == "True" ]]; then
  echo "Failure Logs: PASS"
else
  echo "Failure Logs: FAIL"
fi
echo "Dependency Contract: ${demo_contract}"

if [[ "$READY_STATUS" != "True" || "$LATEST_TRAFFIC" != "True" || "$endpoint_ok" != "true" || "$SECRET_WIRING" != "PASS" ]]; then
  echo "[FAIL] Cloud Run encoder health gate failed"
  exit 1
fi

if [[ "$demo_contract" == "FAIL" ]]; then
  echo "[FAIL] Demo TEXT_ENCODER_URL does not match encoder URL"
  exit 1
fi

echo "[PASS] Cloud Run encoder health gate passed"