apiVersion: serving.knative.dev/v1 kind: Service metadata: name: movimento-text-encoder annotations: run.googleapis.com/launch-stage: GA spec: template: metadata: annotations: autoscaling.knative.dev/minScale: "1" autoscaling.knative.dev/maxScale: "1" run.googleapis.com/execution-environment: gen2 run.googleapis.com/gpu-type: GPU_TYPE_PLACEHOLDER run.googleapis.com/gpu-zonal-redundancy-disabled: "true" spec: containerConcurrency: 1 timeoutSeconds: 900 containers: - image: REGION-docker.pkg.dev/PROJECT_ID/kimodo/kimodo:latest command: ["python", "-m", "kimodo.scripts.run_text_encoder_server"] ports: - containerPort: 9550 resources: limits: cpu: "8" memory: 24Gi nvidia.com/gpu: "GPU_COUNT_PLACEHOLDER" env: - name: GRADIO_SERVER_NAME value: "0.0.0.0" - name: TEXT_ENCODER value: "llm2vec" - name: LOCAL_CACHE value: "true" - name: HF_HOME value: /workspace/.cache/huggingface - name: PYTHONUNBUFFERED value: "1" - name: HF_TOKEN valueFrom: secretKeyRef: name: HF_TOKEN_SECRET_NAME key: latest - name: HUGGING_FACE_HUB_TOKEN valueFrom: secretKeyRef: name: HF_TOKEN_SECRET_NAME key: latest - name: HF_HUB_TOKEN valueFrom: secretKeyRef: name: HF_TOKEN_SECRET_NAME key: latest - name: HUGGINGFACEHUB_API_TOKEN valueFrom: secretKeyRef: name: HF_TOKEN_SECRET_NAME key: latest traffic: - percent: 100 latestRevision: true