Cortiq-Labs
/

IconClip-ViT-L-14-text-encoder-ONNX

@@ -276,15 +276,24 @@ import { AutoTokenizer, AutoModel } from "@huggingface/transformers";
 const repo = "Cortiq-Labs/IconClip-ViT-L-14-text-encoder-ONNX";
 const tokenizer = await AutoTokenizer.from_pretrained(repo);
-const model = await AutoModel.from_pretrained(repo, { quantized: true });
-const enc = await tokenizer("shopping cart", {
   padding: "max_length",
-  truncation: true,
   max_length: 77,
 });
-const { embeddings } = await model({ input_ids: enc.input_ids });
-// embeddings: Float32Array(768), L2-normalised
 ```
 For best results, enable cross-origin isolation (`COOP: same-origin` +

 const repo = "Cortiq-Labs/IconClip-ViT-L-14-text-encoder-ONNX";
 const tokenizer = await AutoTokenizer.from_pretrained(repo);
+const model = await AutoModel.from_pretrained(repo, {
+  dtype: "q8",
+  device: "wasm",  // or "webgpu" where supported
+});
+// CLIP text encoders use a fixed 77-token context window — pad every input
+// to that length. This matches OpenAI CLIP, LAION CLIP, and every other
+// CLIP-family ONNX export on HF.
+const enc = await tokenizer(["shopping cart"], {
   padding: "max_length",
   max_length: 77,
+  truncation: true,
 });
+const out = await model(enc);
+// The ONNX exposes the projected 768-d output under the `embeddings` key.
+// transformers.js v4's EncoderOnly fallback uses this name when the
+// CLIPTextModel config maps to the q8 quantized graph.
+const vec = out.embeddings.data;  // Float32Array(768), L2-normalised
 ```
 For best results, enable cross-origin isolation (`COOP: same-origin` +