barakplasma
/

sapiens2-onnx

@@ -1,92 +1,139 @@
 /**
  * example_embeddings.js
  *
- * Demonstrates loading sapiens2-0.1b ONNX and generating image embeddings
- * in Node.js. Compares two images and finds the most similar image in a set.
  *
- * Requirements:
- *   npm install onnxruntime-node sharp
  *
  * Usage:
- *   node example_embeddings.js image_a.jpg image_b.jpg [image_c.jpg ...]
- *
- *   First two images are compared directly.
- *   If more images are supplied, the image most similar to image_a.jpg is found.
  */
-import * as ort from "onnxruntime-node";
-import sharp from "sharp";
-import path from "path";
-import { fileURLToPath } from "url";
-// ── Config ────────────────────────────────────────────────────────────────────
-const MODEL_PATH = path.join(
-  path.dirname(fileURLToPath(import.meta.url)),
-  "sapiens2_0.1b_int8.onnx"
-);
 const H = 1024;
 const W = 768;
 const MEAN = [0.485, 0.456, 0.406];
 const STD  = [0.229, 0.224, 0.225];
-// ── Core functions ────────────────────────────────────────────────────────────
 /**
- * Load the ONNX inference session. Reuse the returned session for all images —
- * loading takes ~1-2 s and should only happen once.
  */
-export async function loadModel(modelPath = MODEL_PATH) {
-  return ort.InferenceSession.create(modelPath, {
-    executionProviders: ["cpu"],
     graphOptimizationLevel: "all",
   });
 }
 /**
- * Read an image from disk, resize to 1024×768, and convert to a float32
- * NCHW tensor with ImageNet normalization.
  *
- * @param {string} imagePath  Path to any image format supported by sharp.
- * @returns {ort.Tensor}      Shape (1, 3, 1024, 768).
  */
-export async function imageToTensor(imagePath) {
-  const { data } = await sharp(imagePath)
-    .resize(W, H)       // sharp uses (width, height)
-    .removeAlpha()      // drop alpha if present
-    .raw()              // uncompressed RGB bytes
-    .toBuffer({ resolveWithObject: true });
   const t = new Float32Array(3 * H * W);
   for (let i = 0; i < H * W; i++) {
-    t[i]             = (data[i * 3]     / 255 - MEAN[0]) / STD[0]; // R
-    t[H * W + i]     = (data[i * 3 + 1] / 255 - MEAN[1]) / STD[1]; // G
-    t[2 * H * W + i] = (data[i * 3 + 2] / 255 - MEAN[2]) / STD[2]; // B
   }
   return new ort.Tensor("float32", t, [1, 3, H, W]);
 }
 /**
- * Run the model on a single image and return its 768-dimensional embedding.
  *
- * @param {ort.InferenceSession} session
- * @param {string}               imagePath
- * @returns {Float32Array}  Length 768.
  */
-export async function embed(session, imagePath) {
-  const tensor = await imageToTensor(imagePath);
-  const { embedding } = await session.run({ pixel_values: tensor });
-  return embedding.data; // Float32Array
 }
 /**
- * Cosine similarity between two equal-length Float32Arrays.
- * Returns a value in [-1, 1]: 1 = identical direction, 0 = orthogonal, -1 = opposite.
  */
 export function cosineSimilarity(a, b) {
-  if (a.length !== b.length) throw new Error("Embedding length mismatch");
   let dot = 0, normA = 0, normB = 0;
   for (let i = 0; i < a.length; i++) {
     dot   += a[i] * b[i];
@@ -97,8 +144,11 @@ export function cosineSimilarity(a, b) {
 }
 /**
- * L2-normalize an embedding in place. After normalization you can use a simple
- * dot product instead of cosine similarity, which is faster for large databases.
  */
 export function l2Normalize(v) {
   let norm = 0;
@@ -110,60 +160,17 @@ export function l2Normalize(v) {
 }
 /**
- * Given a query embedding and an array of candidate embeddings, return the
- * index of the most similar candidate and its similarity score.
  */
 export function findMostSimilar(query, candidates) {
-  let bestIdx = -1;
-  let bestScore = -Infinity;
   for (let i = 0; i < candidates.length; i++) {
     const score = cosineSimilarity(query, candidates[i]);
-    if (score > bestScore) {
-      bestScore = score;
-      bestIdx   = i;
-    }
   }
   return { index: bestIdx, score: bestScore };
 }
-// ── Demo ──────────────────────────────────────────────────────────────────────
-async function main() {
-  const args = process.argv.slice(2);
-  if (args.length < 2) {
-    console.error("Usage: node example_embeddings.js image_a.jpg image_b.jpg [more...]");
-    process.exit(1);
-  }
-  console.log("Loading model...");
-  const session = await loadModel();
-  console.log("Model loaded.\n");
-  // Embed all provided images
-  const embeddings = [];
-  for (const imgPath of args) {
-    process.stdout.write(`Embedding ${path.basename(imgPath)}... `);
-    const t0 = Date.now();
-    const emb = await embed(session, imgPath);
-    console.log(`done (${Date.now() - t0} ms, dim=${emb.length})`);
-    embeddings.push(emb);
-  }
-  // Compare first two images
-  const [a, b] = args;
-  const score = cosineSimilarity(embeddings[0], embeddings[1]);
-  console.log(`\nCosine similarity between ${path.basename(a)} and ${path.basename(b)}: ${score.toFixed(4)}`);
-  console.log(score > 0.85 ? "  → Very similar" : score > 0.6 ? "  → Somewhat similar" : "  → Dissimilar");
-  // If more than 2 images: find the most similar to the first
-  if (args.length > 2) {
-    const candidates = embeddings.slice(1);
-    const { index, score: bestScore } = findMostSimilar(embeddings[0], candidates);
-    console.log(
-      `\nMost similar to ${path.basename(args[0])}: ${path.basename(args[index + 1])} ` +
-      `(score=${bestScore.toFixed(4)})`
-    );
-  }
-}
-main().catch(err => { console.error(err); process.exit(1); });

 /**
  * example_embeddings.js
  *
+ * Drop-in ES module for browser use. Exports:
+ *   loadModelCached(url?)    — load and cache model in IndexedDB
+ *   embed(session, source)   — get 768-dim Float32Array from any image source
+ *   cosineSimilarity(a, b)   — similarity score in [-1, 1]
+ *   l2Normalize(v)           — normalize so dot product equals cosine similarity
+ *   findMostSimilar(q, list) — nearest-neighbor in an embedding array
  *
+ * Requirements: onnxruntime-web (npm install onnxruntime-web)
  *
  * Usage:
+ *   import { loadModelCached, embed, cosineSimilarity } from "./example_embeddings.js";
+ *   const session = await loadModelCached();
+ *   const emb = await embed(session, document.getElementById("myImage"));
  */
+import * as ort from "onnxruntime-web";
+// ── Config ─────────────────────────────────────────────────────────────────
+ort.env.wasm.wasmPaths = "https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/";
+const MODEL_URL =
+  "https://huggingface.co/barakplasma/sapiens2-onnx/resolve/main/sapiens2_0.1b_int8.onnx";
 const H = 1024;
 const W = 768;
 const MEAN = [0.485, 0.456, 0.406];
 const STD  = [0.229, 0.224, 0.225];
+const DB_NAME = "sapiens2-onnx";
+const DB_STORE = "models";
+// ── IndexedDB helpers ──────────────────────────────────────────────────────
+function openDB() {
+  return new Promise((resolve, reject) => {
+    const req = indexedDB.open(DB_NAME, 1);
+    req.onupgradeneeded = () => req.result.createObjectStore(DB_STORE);
+    req.onsuccess = () => resolve(req.result);
+    req.onerror   = () => reject(req.error);
+  });
+}
+function idbGet(db, key) {
+  return new Promise(resolve => {
+    const req = db.transaction(DB_STORE).objectStore(DB_STORE).get(key);
+    req.onsuccess = () => resolve(req.result ?? null);
+    req.onerror   = () => resolve(null);
+  });
+}
+function idbPut(db, key, value) {
+  return new Promise((resolve, reject) => {
+    const req = db.transaction(DB_STORE, "readwrite").objectStore(DB_STORE).put(value, key);
+    req.onsuccess = () => resolve();
+    req.onerror   = () => reject(req.error);
+  });
+}
+// ── Public API ─────────────────────────────────────────────────────────────
 /**
+ * Load the ONNX model. On first call, fetches from HuggingFace and stores the
+ * ArrayBuffer in IndexedDB. Subsequent calls load from cache instantly.
+ *
+ * @param {string} [url]  Override the default model URL.
+ * @returns {Promise<ort.InferenceSession>}
  */
+export async function loadModelCached(url = MODEL_URL) {
+  const db     = await openDB();
+  const cached = await idbGet(db, url);
+  const buf = cached ?? await fetch(url)
+    .then(r => {
+      if (!r.ok) throw new Error(`Failed to fetch model: ${r.status} ${r.statusText}`);
+      return r.arrayBuffer();
+    })
+    .then(async buf => {
+      await idbPut(db, url, buf);
+      return buf;
+    });
+  return ort.InferenceSession.create(buf, {
+    executionProviders: ["webgpu", "wasm"],
     graphOptimizationLevel: "all",
   });
 }
 /**
+ * Convert an image source to a float32 NCHW tensor with ImageNet normalization.
+ * Accepts anything drawImage() accepts: <img>, <canvas>, ImageBitmap, VideoFrame.
  *
+ * @param {HTMLImageElement|HTMLCanvasElement|ImageBitmap|VideoFrame} source
+ * @returns {ort.Tensor}  Shape (1, 3, 1024, 768).
  */
+export function imageToTensor(source) {
+  const canvas = document.createElement("canvas");
+  canvas.width  = W;
+  canvas.height = H;
+  const ctx = canvas.getContext("2d");
+  ctx.drawImage(source, 0, 0, W, H);
+  const { data } = ctx.getImageData(0, 0, W, H); // RGBA uint8
   const t = new Float32Array(3 * H * W);
   for (let i = 0; i < H * W; i++) {
+    t[i]             = (data[i * 4]     / 255 - MEAN[0]) / STD[0]; // R
+    t[H * W + i]     = (data[i * 4 + 1] / 255 - MEAN[1]) / STD[1]; // G
+    t[2 * H * W + i] = (data[i * 4 + 2] / 255 - MEAN[2]) / STD[2]; // B
   }
   return new ort.Tensor("float32", t, [1, 3, H, W]);
 }
 /**
+ * Run the model on one image and return its 768-dim embedding.
  *
+ * @param {ort.InferenceSession}                                      session
+ * @param {HTMLImageElement|HTMLCanvasElement|ImageBitmap|VideoFrame} source
+ * @returns {Promise<Float32Array>}  Length 768.
  */
+export async function embed(session, source) {
+  const { embedding } = await session.run({ pixel_values: imageToTensor(source) });
+  return embedding.data;
 }
 /**
+ * Cosine similarity between two embeddings.
+ * Returns a value in [-1, 1]: 1 = identical direction, 0 = orthogonal.
+ *
+ * @param {Float32Array} a
+ * @param {Float32Array} b
+ * @returns {number}
  */
 export function cosineSimilarity(a, b) {
   let dot = 0, normA = 0, normB = 0;
   for (let i = 0; i < a.length; i++) {
     dot   += a[i] * b[i];
 }
 /**
+ * L2-normalize an embedding. After normalizing all vectors in your database,
+ * you can use a plain dot product instead of cosine similarity (faster at scale).
+ *
+ * @param {Float32Array} v
+ * @returns {Float32Array}
  */
 export function l2Normalize(v) {
   let norm = 0;
 }
 /**
+ * Find the index and score of the most similar embedding in a list.
+ *
+ * @param {Float32Array}   query
+ * @param {Float32Array[]} candidates
+ * @returns {{ index: number, score: number }}
  */
 export function findMostSimilar(query, candidates) {
+  let bestIdx = -1, bestScore = -Infinity;
   for (let i = 0; i < candidates.length; i++) {
     const score = cosineSimilarity(query, candidates[i]);
+    if (score > bestScore) { bestScore = score; bestIdx = i; }
   }
   return { index: bestIdx, score: bestScore };
 }