Spaces:
Running on Zero
Running on Zero
File size: 1,972 Bytes
7939f87 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 | apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: movimento-text-encoder
annotations:
run.googleapis.com/launch-stage: GA
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "1"
autoscaling.knative.dev/maxScale: "1"
run.googleapis.com/execution-environment: gen2
run.googleapis.com/gpu-type: GPU_TYPE_PLACEHOLDER
run.googleapis.com/gpu-zonal-redundancy-disabled: "true"
spec:
containerConcurrency: 1
timeoutSeconds: 900
containers:
- image: REGION-docker.pkg.dev/PROJECT_ID/kimodo/kimodo:latest
command: ["python", "-m", "kimodo.scripts.run_text_encoder_server"]
ports:
- containerPort: 9550
resources:
limits:
cpu: "8"
memory: 24Gi
nvidia.com/gpu: "GPU_COUNT_PLACEHOLDER"
env:
- name: GRADIO_SERVER_NAME
value: "0.0.0.0"
- name: TEXT_ENCODER
value: "llm2vec"
- name: LOCAL_CACHE
value: "true"
- name: HF_HOME
value: /workspace/.cache/huggingface
- name: PYTHONUNBUFFERED
value: "1"
- name: HF_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HUGGING_FACE_HUB_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HF_HUB_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
- name: HUGGINGFACEHUB_API_TOKEN
valueFrom:
secretKeyRef:
name: HF_TOKEN_SECRET_NAME
key: latest
traffic:
- percent: 100
latestRevision: true
|