Spaces:

Reza2kn
/

mega-asr-bench

Running

App Files Files Community

Reza2kn commited on about 20 hours ago

Commit

ed37d13

verified ·

1 Parent(s): 9e54b79

fp16: use canonical u16 bit-pattern viewed as Float16Array; diagnostic top-5 dump

Browse files

Files changed (1) hide show

mega-asr.js +32 -8

mega-asr.js CHANGED Viewed

@@ -377,6 +377,21 @@ async function transcribe({ mel, dims, T_mel }) {
   setStatus("decoding ...");
   let logits = prefillOut.logits.data; // (1, L, VOCAB)
   const logitsDims = prefillOut.logits.dims;
   // get argmax of last token
   let nid = argmax(logits, (logitsDims[1] - 1) * VOCAB, VOCAB);
   const gen = [nid];
@@ -451,19 +466,28 @@ function f32ToF16Bits(v) {
   return (sign << 15) | (newExp << 10) | (frac >> 13);
 }
-// onnxruntime-web 1.20+ wants fp16 data as Float16Array (Chrome 134+); fall
-// back to Uint16Array bit-pattern path on older engines (ORT will treat it
-// as raw fp16 bytes).
 const HAS_F16 = typeof Float16Array !== "undefined";
 function floatArrayToFp16(arr) {
-  if (HAS_F16) {
-    const out = new Float16Array(arr.length);
-    for (let i = 0; i < arr.length; i++) out[i] = arr[i];
-    return out;
-  }
   const u16 = new Uint16Array(arr.length);
   for (let i = 0; i < arr.length; i++) u16[i] = f32ToF16Bits(arr[i]);
   return u16;
 }

   setStatus("decoding ...");
   let logits = prefillOut.logits.data; // (1, L, VOCAB)
   const logitsDims = prefillOut.logits.dims;
+  // Diagnostic: dump top-5 of last logit so we can see what the decoder predicted
+  {
+    const off = (logitsDims[1] - 1) * VOCAB;
+    const idxs = [], vals = [];
+    for (let k = 0; k < 5; k++) {
+      let best = -Infinity, bi = -1;
+      for (let i = 0; i < VOCAB; i++) {
+        if (idxs.includes(i)) continue;
+        const v = logits[off + i];
+        if (v > best) { best = v; bi = i; }
+      }
+      idxs.push(bi); vals.push(best);
+    }
+    log(`prefill top-5: ${idxs.map((i, k) => `${i}(${vals[k].toFixed(2)})`).join(" ")}`);
+  }
   // get argmax of last token
   let nid = argmax(logits, (logitsDims[1] - 1) * VOCAB, VOCAB);
   const gen = [nid];
   return (sign << 15) | (newExp << 10) | (frac >> 13);
 }
+// Build fp16 storage: if browser has Float16Array, use it directly (ORT 1.20
+// validates the constructor). Otherwise build a Uint16Array of bit patterns
+// and view it as a Float16Array if available. Diagnostics: also dump the
+// first few converted values once so we can spot conversion errors.
 const HAS_F16 = typeof Float16Array !== "undefined";
+let _f16_diag_count = 0;
 function floatArrayToFp16(arr) {
+  // Build the u16 bit-pattern explicitly (canonical round-to-nearest-even)
   const u16 = new Uint16Array(arr.length);
   for (let i = 0; i < arr.length; i++) u16[i] = f32ToF16Bits(arr[i]);
+  if (HAS_F16) {
+    // View the same buffer as Float16Array so ORT's type validation passes.
+    const f16 = new Float16Array(u16.buffer, u16.byteOffset, u16.length);
+    if (_f16_diag_count === 0) {
+      _f16_diag_count = 1;
+      const sample = [];
+      for (let i = 0; i < Math.min(5, arr.length); i++) sample.push(arr[i].toFixed(4) + "->" + f16[i].toFixed(4));
+      log(`fp16 sanity: ${sample.join("  ")}`);
+    }
+    return f16;
+  }
   return u16;
 }