Reza2kn commited on
Commit
7be4ffd
·
verified ·
1 Parent(s): 50bb779

Per-step diagnostics: pinpoint which ORT call crashes

Browse files
Files changed (1) hide show
  1. mega-asr.js +21 -9
mega-asr.js CHANGED
@@ -320,10 +320,18 @@ async function transcribe({ mel, dims, T_mel }) {
320
  if (!state.loaded) throw new Error("models not loaded");
321
  // 1. encode
322
  setStatus("audio encoder ...");
323
- const melTensor = new ort.Tensor("float32", mel, dims);
324
- const encOut = await state.encoder.run({ mel: melTensor });
325
- // For WebGPU outputs we must await getData() to bring values back to CPU.
326
- const audioEmbedsAll = await encOut.audio_embeds.getData(true); // Float32Array (1*390*2048,)
 
 
 
 
 
 
 
 
327
  const audioEmbedsDims = encOut.audio_embeds.dims; // [1, 390, 2048]
328
  const realChunks = Math.floor((T_mel + 99) / 100);
329
  const lastChunkMel = T_mel - (realChunks - 1) * 100;
@@ -367,11 +375,15 @@ async function transcribe({ mel, dims, T_mel }) {
367
  // 4. prefill
368
  setStatus("prefill ...");
369
  const t0 = performance.now();
370
- const prefillOut = await state.prefill.run({
371
- inputs_embeds: new ort.Tensor("float16", inputsEmbedsF16, [1, L, HIDDEN]),
372
- attention_mask: new ort.Tensor("int64", attnMask, [1, L]),
373
- position_ids: new ort.Tensor("int64", posIds, [1, L]),
374
- });
 
 
 
 
375
  log(`prefill: ${(performance.now() - t0).toFixed(0)} ms (L=${L})`);
376
 
377
  // 5. greedy decode
 
320
  if (!state.loaded) throw new Error("models not loaded");
321
  // 1. encode
322
  setStatus("audio encoder ...");
323
+ let melTensor;
324
+ try { melTensor = new ort.Tensor("float32", mel, dims); }
325
+ catch (e) { log(`[step] Tensor ctor failed: ${e.message || e}`); throw e; }
326
+ log(`[step] running encoder ...`);
327
+ let encOut;
328
+ try { encOut = await state.encoder.run({ mel: melTensor }); }
329
+ catch (e) { log(`[step] encoder.run failed: ${e.message || e}`); throw e; }
330
+ log(`[step] encoder ok; reading audio_embeds ...`);
331
+ let audioEmbedsAll;
332
+ try { audioEmbedsAll = await encOut.audio_embeds.getData(true); }
333
+ catch (e) { log(`[step] getData failed: ${e.message || e}`); throw e; }
334
+ log(`[step] audio_embeds len=${audioEmbedsAll.length} dtype=${audioEmbedsAll.constructor.name}`);
335
  const audioEmbedsDims = encOut.audio_embeds.dims; // [1, 390, 2048]
336
  const realChunks = Math.floor((T_mel + 99) / 100);
337
  const lastChunkMel = T_mel - (realChunks - 1) * 100;
 
375
  // 4. prefill
376
  setStatus("prefill ...");
377
  const t0 = performance.now();
378
+ log(`[step] running prefill (L=${L}) ...`);
379
+ let prefillOut;
380
+ try {
381
+ prefillOut = await state.prefill.run({
382
+ inputs_embeds: new ort.Tensor("float16", inputsEmbedsF16, [1, L, HIDDEN]),
383
+ attention_mask: new ort.Tensor("int64", attnMask, [1, L]),
384
+ position_ids: new ort.Tensor("int64", posIds, [1, L]),
385
+ });
386
+ } catch (e) { log(`[step] prefill.run failed: ${e.message || e}`); throw e; }
387
  log(`prefill: ${(performance.now() - t0).toFixed(0)} ms (L=${L})`);
388
 
389
  // 5. greedy decode