movimento / cloud-run /text-encoder.yaml
rydlrKE's picture
Cloud Run encoder wiring + startup resilience
7939f87 verified
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: movimento-text-encoder
annotations:
run.googleapis.com/launch-stage: GA
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "1"
autoscaling.knative.dev/maxScale: "1"
run.googleapis.com/execution-environment: gen2
run.googleapis.com/gpu-type: GPU_TYPE_PLACEHOLDER
run.googleapis.com/gpu-zonal-redundancy-disabled: "true"
spec:
containerConcurrency: 1
timeoutSeconds: 900
containers:
- image: REGION-docker.pkg.dev/PROJECT_ID/kimodo/kimodo:latest
command: ["python", "-m", "kimodo.scripts.run_text_encoder_server"]
ports:
- containerPort: 9550
resources:
limits:
cpu: "8"
memory: 24Gi
nvidia.com/gpu: "GPU_COUNT_PLACEHOLDER"
env:
- name: GRADIO_SERVER_NAME
value: "0.0.0.0"
- name: TEXT_ENCODER
value: "llm2vec"
- name: LOCAL_CACHE
value: "true"
- name: HF_HOME
value: /workspace/.cache/huggingface
- name: PYTHONUNBUFFERED
value: "1"
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HF_HUB_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
traffic:
- percent: 100
latestRevision: true