Reza2kn commited on
Commit
b8c2d24
·
verified ·
1 Parent(s): 7be4ffd

Use INT4 encoder (MatMulNBits — WebGPU-supported); ORT 1.23 keeps webgpu fp16 fix for decoders

Browse files
Files changed (1) hide show
  1. mega-asr.js +9 -20
mega-asr.js CHANGED
@@ -232,26 +232,15 @@ async function loadAll() {
232
  setProgress(30);
233
 
234
  // 3. ONNX sessions
235
- // Audio encoder: try static INT8 (QLinearConv/QLinearMatMul, browser-OK),
236
- // then fall back to INT4 (MatMulNBits) if static INT8 also has issues.
237
- // The OLD audio_encoder_int8.onnx uses ConvInteger (dynamic quant) which
238
- // onnxruntime-web does NOT support — skipping that.
239
- setLoaderStatus("audio encoder (INT8 static → INT4 fallback) ...");
240
- try {
241
- state.encoder = await createSessionSimple(
242
- `${HF_ROOT}/onnx/audio_encoder_int8_static.onnx`,
243
- "audio_encoder INT8 (static)",
244
- p => setProgress(30 + p * 10),
245
- );
246
- } catch (e) {
247
- log(`static INT8 encoder unsupported (${e.message}); falling back to INT4`);
248
- state.encoder = await createSession(
249
- `${HF_ROOT}/onnx/audio_encoder_int4.onnx`,
250
- `${HF_ROOT}/onnx/audio_encoder_int4.onnx.data`,
251
- "audio_encoder INT4",
252
- p => setProgress(30 + p * 10),
253
- );
254
- }
255
  setProgress(40);
256
 
257
  setLoaderStatus("decoder prefill (~970 MB)...");
 
232
  setProgress(30);
233
 
234
  // 3. ONNX sessions
235
+ // Audio encoder: INT4 (MatMulNBits) — well-supported on WebGPU and WASM.
236
+ // Static INT8 (QLinearConv/QLinearMatMul) crashes onnxruntime-web on WebGPU.
237
+ setLoaderStatus("audio encoder INT4 ...");
238
+ state.encoder = await createSession(
239
+ `${HF_ROOT}/onnx/audio_encoder_int4.onnx`,
240
+ `${HF_ROOT}/onnx/audio_encoder_int4.onnx.data`,
241
+ "audio_encoder INT4",
242
+ p => setProgress(30 + p * 10),
243
+ );
 
 
 
 
 
 
 
 
 
 
 
244
  setProgress(40);
245
 
246
  setLoaderStatus("decoder prefill (~970 MB)...");