Spaces:
Running
Running
fp16: use canonical u16 bit-pattern viewed as Float16Array; diagnostic top-5 dump
Browse files- mega-asr.js +32 -8
mega-asr.js
CHANGED
|
@@ -377,6 +377,21 @@ async function transcribe({ mel, dims, T_mel }) {
|
|
| 377 |
setStatus("decoding ...");
|
| 378 |
let logits = prefillOut.logits.data; // (1, L, VOCAB)
|
| 379 |
const logitsDims = prefillOut.logits.dims;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
// get argmax of last token
|
| 381 |
let nid = argmax(logits, (logitsDims[1] - 1) * VOCAB, VOCAB);
|
| 382 |
const gen = [nid];
|
|
@@ -451,19 +466,28 @@ function f32ToF16Bits(v) {
|
|
| 451 |
return (sign << 15) | (newExp << 10) | (frac >> 13);
|
| 452 |
}
|
| 453 |
|
| 454 |
-
//
|
| 455 |
-
//
|
| 456 |
-
// as
|
|
|
|
| 457 |
const HAS_F16 = typeof Float16Array !== "undefined";
|
|
|
|
| 458 |
|
| 459 |
function floatArrayToFp16(arr) {
|
| 460 |
-
|
| 461 |
-
const out = new Float16Array(arr.length);
|
| 462 |
-
for (let i = 0; i < arr.length; i++) out[i] = arr[i];
|
| 463 |
-
return out;
|
| 464 |
-
}
|
| 465 |
const u16 = new Uint16Array(arr.length);
|
| 466 |
for (let i = 0; i < arr.length; i++) u16[i] = f32ToF16Bits(arr[i]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 467 |
return u16;
|
| 468 |
}
|
| 469 |
|
|
|
|
| 377 |
setStatus("decoding ...");
|
| 378 |
let logits = prefillOut.logits.data; // (1, L, VOCAB)
|
| 379 |
const logitsDims = prefillOut.logits.dims;
|
| 380 |
+
// Diagnostic: dump top-5 of last logit so we can see what the decoder predicted
|
| 381 |
+
{
|
| 382 |
+
const off = (logitsDims[1] - 1) * VOCAB;
|
| 383 |
+
const idxs = [], vals = [];
|
| 384 |
+
for (let k = 0; k < 5; k++) {
|
| 385 |
+
let best = -Infinity, bi = -1;
|
| 386 |
+
for (let i = 0; i < VOCAB; i++) {
|
| 387 |
+
if (idxs.includes(i)) continue;
|
| 388 |
+
const v = logits[off + i];
|
| 389 |
+
if (v > best) { best = v; bi = i; }
|
| 390 |
+
}
|
| 391 |
+
idxs.push(bi); vals.push(best);
|
| 392 |
+
}
|
| 393 |
+
log(`prefill top-5: ${idxs.map((i, k) => `${i}(${vals[k].toFixed(2)})`).join(" ")}`);
|
| 394 |
+
}
|
| 395 |
// get argmax of last token
|
| 396 |
let nid = argmax(logits, (logitsDims[1] - 1) * VOCAB, VOCAB);
|
| 397 |
const gen = [nid];
|
|
|
|
| 466 |
return (sign << 15) | (newExp << 10) | (frac >> 13);
|
| 467 |
}
|
| 468 |
|
| 469 |
+
// Build fp16 storage: if browser has Float16Array, use it directly (ORT 1.20
|
| 470 |
+
// validates the constructor). Otherwise build a Uint16Array of bit patterns
|
| 471 |
+
// and view it as a Float16Array if available. Diagnostics: also dump the
|
| 472 |
+
// first few converted values once so we can spot conversion errors.
|
| 473 |
const HAS_F16 = typeof Float16Array !== "undefined";
|
| 474 |
+
let _f16_diag_count = 0;
|
| 475 |
|
| 476 |
function floatArrayToFp16(arr) {
|
| 477 |
+
// Build the u16 bit-pattern explicitly (canonical round-to-nearest-even)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 478 |
const u16 = new Uint16Array(arr.length);
|
| 479 |
for (let i = 0; i < arr.length; i++) u16[i] = f32ToF16Bits(arr[i]);
|
| 480 |
+
if (HAS_F16) {
|
| 481 |
+
// View the same buffer as Float16Array so ORT's type validation passes.
|
| 482 |
+
const f16 = new Float16Array(u16.buffer, u16.byteOffset, u16.length);
|
| 483 |
+
if (_f16_diag_count === 0) {
|
| 484 |
+
_f16_diag_count = 1;
|
| 485 |
+
const sample = [];
|
| 486 |
+
for (let i = 0; i < Math.min(5, arr.length); i++) sample.push(arr[i].toFixed(4) + "->" + f16[i].toFixed(4));
|
| 487 |
+
log(`fp16 sanity: ${sample.join(" ")}`);
|
| 488 |
+
}
|
| 489 |
+
return f16;
|
| 490 |
+
}
|
| 491 |
return u16;
|
| 492 |
}
|
| 493 |
|