Enable Baseten-Embeddings-Inference
#11
by baseten-admin - opened
No description provided.
baseten-admin changed pull request status to closed
baseten-admin changed pull request status to open
Runs improved batching and compile:
model_metadata:
example_model_input:
encoding_format: float
input: text string
model: model
model_name: BEI-Bert-pplx-embed
resources:
accelerator: H100
cpu: '1'
memory: 10Gi
use_gpu: true
trt_llm:
build:
base_model: encoder_bert
checkpoint_repository:
repo: perplexity-ai/pplx-embed-v1-0.6b
revision: refs/pr/11
source: HF
max_num_tokens: 32768
runtime:
webserver_default_route: /v1/embeddings