Spaces:

Reza2kn
/

mega-asr-bench

Running

App Files Files Community

Reza2kn commited on 1 day ago

Commit

7be4ffd

verified ·

1 Parent(s): 50bb779

Per-step diagnostics: pinpoint which ORT call crashes

Browse files

Files changed (1) hide show

mega-asr.js +21 -9

mega-asr.js CHANGED Viewed

@@ -320,10 +320,18 @@ async function transcribe({ mel, dims, T_mel }) {
   if (!state.loaded) throw new Error("models not loaded");
   // 1. encode
   setStatus("audio encoder ...");
-  const melTensor = new ort.Tensor("float32", mel, dims);
-  const encOut = await state.encoder.run({ mel: melTensor });
-  // For WebGPU outputs we must await getData() to bring values back to CPU.
-  const audioEmbedsAll = await encOut.audio_embeds.getData(true); // Float32Array (1*390*2048,)
   const audioEmbedsDims = encOut.audio_embeds.dims; // [1, 390, 2048]
   const realChunks = Math.floor((T_mel + 99) / 100);
   const lastChunkMel = T_mel - (realChunks - 1) * 100;
@@ -367,11 +375,15 @@ async function transcribe({ mel, dims, T_mel }) {
   // 4. prefill
   setStatus("prefill ...");
   const t0 = performance.now();
-  const prefillOut = await state.prefill.run({
-    inputs_embeds: new ort.Tensor("float16", inputsEmbedsF16, [1, L, HIDDEN]),
-    attention_mask: new ort.Tensor("int64", attnMask, [1, L]),
-    position_ids: new ort.Tensor("int64", posIds, [1, L]),
-  });
   log(`prefill: ${(performance.now() - t0).toFixed(0)} ms (L=${L})`);
   // 5. greedy decode

   if (!state.loaded) throw new Error("models not loaded");
   // 1. encode
   setStatus("audio encoder ...");
+  let melTensor;
+  try { melTensor = new ort.Tensor("float32", mel, dims); }
+  catch (e) { log(`[step] Tensor ctor failed: ${e.message || e}`); throw e; }
+  log(`[step] running encoder ...`);
+  let encOut;
+  try { encOut = await state.encoder.run({ mel: melTensor }); }
+  catch (e) { log(`[step] encoder.run failed: ${e.message || e}`); throw e; }
+  log(`[step] encoder ok; reading audio_embeds ...`);
+  let audioEmbedsAll;
+  try { audioEmbedsAll = await encOut.audio_embeds.getData(true); }
+  catch (e) { log(`[step] getData failed: ${e.message || e}`); throw e; }
+  log(`[step] audio_embeds len=${audioEmbedsAll.length} dtype=${audioEmbedsAll.constructor.name}`);
   const audioEmbedsDims = encOut.audio_embeds.dims; // [1, 390, 2048]
   const realChunks = Math.floor((T_mel + 99) / 100);
   const lastChunkMel = T_mel - (realChunks - 1) * 100;
   // 4. prefill
   setStatus("prefill ...");
   const t0 = performance.now();
+  log(`[step] running prefill (L=${L}) ...`);
+  let prefillOut;
+  try {
+    prefillOut = await state.prefill.run({
+      inputs_embeds: new ort.Tensor("float16", inputsEmbedsF16, [1, L, HIDDEN]),
+      attention_mask: new ort.Tensor("int64", attnMask, [1, L]),
+      position_ids: new ort.Tensor("int64", posIds, [1, L]),
+    });
+  } catch (e) { log(`[step] prefill.run failed: ${e.message || e}`); throw e; }
   log(`prefill: ${(performance.now() - t0).toFixed(0)} ms (L=${L})`);
   // 5. greedy decode