Spaces:
Running
Running
Use INT4 encoder (MatMulNBits — WebGPU-supported); ORT 1.23 keeps webgpu fp16 fix for decoders
Browse files- mega-asr.js +9 -20
mega-asr.js
CHANGED
|
@@ -232,26 +232,15 @@ async function loadAll() {
|
|
| 232 |
setProgress(30);
|
| 233 |
|
| 234 |
// 3. ONNX sessions
|
| 235 |
-
// Audio encoder:
|
| 236 |
-
//
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
p => setProgress(30 + p * 10),
|
| 245 |
-
);
|
| 246 |
-
} catch (e) {
|
| 247 |
-
log(`static INT8 encoder unsupported (${e.message}); falling back to INT4`);
|
| 248 |
-
state.encoder = await createSession(
|
| 249 |
-
`${HF_ROOT}/onnx/audio_encoder_int4.onnx`,
|
| 250 |
-
`${HF_ROOT}/onnx/audio_encoder_int4.onnx.data`,
|
| 251 |
-
"audio_encoder INT4",
|
| 252 |
-
p => setProgress(30 + p * 10),
|
| 253 |
-
);
|
| 254 |
-
}
|
| 255 |
setProgress(40);
|
| 256 |
|
| 257 |
setLoaderStatus("decoder prefill (~970 MB)...");
|
|
|
|
| 232 |
setProgress(30);
|
| 233 |
|
| 234 |
// 3. ONNX sessions
|
| 235 |
+
// Audio encoder: INT4 (MatMulNBits) — well-supported on WebGPU and WASM.
|
| 236 |
+
// Static INT8 (QLinearConv/QLinearMatMul) crashes onnxruntime-web on WebGPU.
|
| 237 |
+
setLoaderStatus("audio encoder INT4 ...");
|
| 238 |
+
state.encoder = await createSession(
|
| 239 |
+
`${HF_ROOT}/onnx/audio_encoder_int4.onnx`,
|
| 240 |
+
`${HF_ROOT}/onnx/audio_encoder_int4.onnx.data`,
|
| 241 |
+
"audio_encoder INT4",
|
| 242 |
+
p => setProgress(30 + p * 10),
|
| 243 |
+
);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
setProgress(40);
|
| 245 |
|
| 246 |
setLoaderStatus("decoder prefill (~970 MB)...");
|