Image Feature Extraction
Transformers
ONNX
sapiens
sapiens2
vision-transformer
human-centric
feature-extraction
onnxruntime-web
Instructions to use barakplasma/sapiens2-onnx with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use barakplasma/sapiens2-onnx with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-feature-extraction", model="barakplasma/sapiens2-onnx")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("barakplasma/sapiens2-onnx", dtype="auto") - sapiens
How to use barakplasma/sapiens2-onnx with sapiens:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- sapiens2
How to use barakplasma/sapiens2-onnx with sapiens2:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 6,147 Bytes
3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a 42e82e0 3ef373a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 | /**
* example_embeddings.js
*
* Drop-in ES module for browser use. Exports:
* loadModelCached(url?) β load and cache model in IndexedDB
* embed(session, source) β get 768-dim Float32Array from any image source
* cosineSimilarity(a, b) β similarity score in [-1, 1]
* l2Normalize(v) β normalize so dot product equals cosine similarity
* findMostSimilar(q, list) β nearest-neighbor in an embedding array
*
* Requirements: onnxruntime-web (npm install onnxruntime-web)
*
* Usage:
* import { loadModelCached, embed, cosineSimilarity } from "./example_embeddings.js";
* const session = await loadModelCached();
* const emb = await embed(session, document.getElementById("myImage"));
*/
import * as ort from "onnxruntime-web";
// ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/";
const MODEL_URL =
"https://huggingface.co/barakplasma/sapiens2-onnx/resolve/main/sapiens2_0.1b_int8.onnx";
const H = 1024;
const W = 768;
const MEAN = [0.485, 0.456, 0.406];
const STD = [0.229, 0.224, 0.225];
const DB_NAME = "sapiens2-onnx";
const DB_STORE = "models";
// ββ IndexedDB helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
function openDB() {
return new Promise((resolve, reject) => {
const req = indexedDB.open(DB_NAME, 1);
req.onupgradeneeded = () => req.result.createObjectStore(DB_STORE);
req.onsuccess = () => resolve(req.result);
req.onerror = () => reject(req.error);
});
}
function idbGet(db, key) {
return new Promise(resolve => {
const req = db.transaction(DB_STORE).objectStore(DB_STORE).get(key);
req.onsuccess = () => resolve(req.result ?? null);
req.onerror = () => resolve(null);
});
}
function idbPut(db, key, value) {
return new Promise((resolve, reject) => {
const req = db.transaction(DB_STORE, "readwrite").objectStore(DB_STORE).put(value, key);
req.onsuccess = () => resolve();
req.onerror = () => reject(req.error);
});
}
// ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
/**
* Load the ONNX model. On first call, fetches from HuggingFace and stores the
* ArrayBuffer in IndexedDB. Subsequent calls load from cache instantly.
*
* @param {string} [url] Override the default model URL.
* @returns {Promise<ort.InferenceSession>}
*/
export async function loadModelCached(url = MODEL_URL) {
const db = await openDB();
const cached = await idbGet(db, url);
const buf = cached ?? await fetch(url)
.then(r => {
if (!r.ok) throw new Error(`Failed to fetch model: ${r.status} ${r.statusText}`);
return r.arrayBuffer();
})
.then(async buf => {
await idbPut(db, url, buf);
return buf;
});
return ort.InferenceSession.create(buf, {
executionProviders: ["webgpu", "wasm"],
graphOptimizationLevel: "all",
});
}
/**
* Convert an image source to a float32 NCHW tensor with ImageNet normalization.
* Accepts anything drawImage() accepts: <img>, <canvas>, ImageBitmap, VideoFrame.
*
* @param {HTMLImageElement|HTMLCanvasElement|ImageBitmap|VideoFrame} source
* @returns {ort.Tensor} Shape (1, 3, 1024, 768).
*/
export function imageToTensor(source) {
const canvas = document.createElement("canvas");
canvas.width = W;
canvas.height = H;
const ctx = canvas.getContext("2d");
ctx.drawImage(source, 0, 0, W, H);
const { data } = ctx.getImageData(0, 0, W, H); // RGBA uint8
const t = new Float32Array(3 * H * W);
for (let i = 0; i < H * W; i++) {
t[i] = (data[i * 4] / 255 - MEAN[0]) / STD[0]; // R
t[H * W + i] = (data[i * 4 + 1] / 255 - MEAN[1]) / STD[1]; // G
t[2 * H * W + i] = (data[i * 4 + 2] / 255 - MEAN[2]) / STD[2]; // B
}
return new ort.Tensor("float32", t, [1, 3, H, W]);
}
/**
* Run the model on one image and return its 768-dim embedding.
*
* @param {ort.InferenceSession} session
* @param {HTMLImageElement|HTMLCanvasElement|ImageBitmap|VideoFrame} source
* @returns {Promise<Float32Array>} Length 768.
*/
export async function embed(session, source) {
const { embedding } = await session.run({ pixel_values: imageToTensor(source) });
return embedding.data;
}
/**
* Cosine similarity between two embeddings.
* Returns a value in [-1, 1]: 1 = identical direction, 0 = orthogonal.
*
* @param {Float32Array} a
* @param {Float32Array} b
* @returns {number}
*/
export function cosineSimilarity(a, b) {
let dot = 0, normA = 0, normB = 0;
for (let i = 0; i < a.length; i++) {
dot += a[i] * b[i];
normA += a[i] * a[i];
normB += b[i] * b[i];
}
return dot / (Math.sqrt(normA) * Math.sqrt(normB));
}
/**
* L2-normalize an embedding. After normalizing all vectors in your database,
* you can use a plain dot product instead of cosine similarity (faster at scale).
*
* @param {Float32Array} v
* @returns {Float32Array}
*/
export function l2Normalize(v) {
let norm = 0;
for (let i = 0; i < v.length; i++) norm += v[i] * v[i];
norm = Math.sqrt(norm);
const out = new Float32Array(v.length);
for (let i = 0; i < v.length; i++) out[i] = v[i] / norm;
return out;
}
/**
* Find the index and score of the most similar embedding in a list.
*
* @param {Float32Array} query
* @param {Float32Array[]} candidates
* @returns {{ index: number, score: number }}
*/
export function findMostSimilar(query, candidates) {
let bestIdx = -1, bestScore = -Infinity;
for (let i = 0; i < candidates.length; i++) {
const score = cosineSimilarity(query, candidates[i]);
if (score > bestScore) { bestScore = score; bestIdx = i; }
}
return { index: bestIdx, score: bestScore };
}
|