Upload index.html
Browse files- index.html +18 -29
index.html
CHANGED
|
@@ -628,8 +628,11 @@ processBtn.addEventListener('click', async () => {
|
|
| 628 |
}
|
| 629 |
const totalPadded = padded.length;
|
| 630 |
|
| 631 |
-
//
|
| 632 |
-
|
|
|
|
|
|
|
|
|
|
| 633 |
const outputs = [];
|
| 634 |
const numChunks = Math.ceil(totalPadded / chunkSamples);
|
| 635 |
let chunkIdx = 0;
|
|
@@ -637,48 +640,34 @@ processBtn.addEventListener('click', async () => {
|
|
| 637 |
progressLabel.textContent = 'Processing…';
|
| 638 |
const t0 = performance.now();
|
| 639 |
|
| 640 |
-
// sr_bin_idx
|
| 641 |
-
|
| 642 |
-
const srBins = meta.sr_bins || [48000]; // e.g. [48000] or [16000,24000,48000]
|
| 643 |
-
let srBinIdx = srBins.indexOf(TARGET_SR);
|
| 644 |
-
if (srBinIdx < 0) srBinIdx = 0; // fallback to first bin
|
| 645 |
-
const srIdx = new Int32Array([srBinIdx]);
|
| 646 |
-
console.log('[VoxUpscaler] sr_bin_idx =', srBinIdx, '(bins:', srBins, ')');
|
| 647 |
-
|
| 648 |
-
// State size: prefer meta.total_state_size, fall back to session input shape
|
| 649 |
-
const stateSize = meta.total_state_size ||
|
| 650 |
-
(session.inputNames.includes('state_in')
|
| 651 |
-
? session.inputs.find(i => i.name === 'state_in')?.dims?.reduce((a,b)=>a*b,1) || 0
|
| 652 |
-
: 0);
|
| 653 |
-
if (!stateSize) {
|
| 654 |
-
console.error('[VoxUpscaler] Could not determine state size from meta:', meta);
|
| 655 |
-
}
|
| 656 |
-
console.log('[VoxUpscaler] state size =', stateSize);
|
| 657 |
|
| 658 |
for (let pos = 0; pos < totalPadded; pos += chunkSamples) {
|
| 659 |
const end = Math.min(pos + chunkSamples, totalPadded);
|
| 660 |
const chunk = padded.slice(pos, end);
|
| 661 |
|
| 662 |
-
// Shape: [1, 1, samples]
|
| 663 |
const audioTensor = new ort.Tensor('float32', chunk, [1, 1, chunk.length]);
|
| 664 |
-
const srTensor
|
| 665 |
-
const stateTensor = new ort.Tensor('float32', state,
|
| 666 |
|
| 667 |
const result = await session.run({
|
| 668 |
-
audio:
|
| 669 |
sr_bin_idx: srTensor,
|
| 670 |
-
state_in:
|
| 671 |
});
|
| 672 |
|
| 673 |
const audioOut = await readTensorData(result.audio_out);
|
| 674 |
const stateOut = await readTensorData(result.state_out);
|
| 675 |
|
| 676 |
-
//
|
| 677 |
if (chunkIdx === 0) {
|
| 678 |
-
const
|
| 679 |
-
const
|
| 680 |
-
|
| 681 |
-
|
|
|
|
|
|
|
| 682 |
}
|
| 683 |
|
| 684 |
outputs.push(new Float32Array(audioOut));
|
|
|
|
| 628 |
}
|
| 629 |
const totalPadded = padded.length;
|
| 630 |
|
| 631 |
+
// State: meta gives only flat size, no shape dims.
|
| 632 |
+
// [1, total_state_size] is the standard ONNX convention for a flat state.
|
| 633 |
+
const stateFlat = meta.total_state_size; // 338502
|
| 634 |
+
const stateShape = [1, stateFlat];
|
| 635 |
+
let state = new Float32Array(stateFlat);
|
| 636 |
const outputs = [];
|
| 637 |
const numChunks = Math.ceil(totalPadded / chunkSamples);
|
| 638 |
let chunkIdx = 0;
|
|
|
|
| 640 |
progressLabel.textContent = 'Processing…';
|
| 641 |
const t0 = performance.now();
|
| 642 |
|
| 643 |
+
// sr_bin_idx 0 = 48 kHz (VoxCPM2 only has one output SR)
|
| 644 |
+
const srIdx = new Int32Array([0]);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
for (let pos = 0; pos < totalPadded; pos += chunkSamples) {
|
| 647 |
const end = Math.min(pos + chunkSamples, totalPadded);
|
| 648 |
const chunk = padded.slice(pos, end);
|
| 649 |
|
|
|
|
| 650 |
const audioTensor = new ort.Tensor('float32', chunk, [1, 1, chunk.length]);
|
| 651 |
+
const srTensor = new ort.Tensor('int32', srIdx, [1]);
|
| 652 |
+
const stateTensor = new ort.Tensor('float32', state, stateShape);
|
| 653 |
|
| 654 |
const result = await session.run({
|
| 655 |
+
audio: audioTensor,
|
| 656 |
sr_bin_idx: srTensor,
|
| 657 |
+
state_in: stateTensor,
|
| 658 |
});
|
| 659 |
|
| 660 |
const audioOut = await readTensorData(result.audio_out);
|
| 661 |
const stateOut = await readTensorData(result.state_out);
|
| 662 |
|
| 663 |
+
// Amplitude check on first chunk — open DevTools Console to see this
|
| 664 |
if (chunkIdx === 0) {
|
| 665 |
+
const a = new Float32Array(audioOut);
|
| 666 |
+
const maxAmp = a.reduce((m, v) => Math.max(m, Math.abs(v)), 0);
|
| 667 |
+
const outShape = result.audio_out?.dims;
|
| 668 |
+
console.log('[VoxUpscaler] chunk 0 → audio_out shape:', outShape,
|
| 669 |
+
'| max amplitude:', maxAmp.toFixed(6),
|
| 670 |
+
maxAmp < 1e-6 ? '⚠️ SILENT — model outputting zeros' : '✓ signal present');
|
| 671 |
}
|
| 672 |
|
| 673 |
outputs.push(new Float32Array(audioOut));
|