| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>Vox Upscaler</title> |
| <link rel="preconnect" href="https://fonts.googleapis.com"> |
| <link href="https://fonts.googleapis.com/css2?family=DM+Mono:wght@400;500&family=Instrument+Sans:wght@400;600;700&display=swap" rel="stylesheet"> |
| <style> |
| *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; } |
| |
| :root { |
| --bg: #0a0a0f; |
| --surface: #13131a; |
| --border: #1e1e2a; |
| --text: #e8e8ed; |
| --text-dim: #6b6b7b; |
| --accent: #ff6b35; |
| --accent-glow: rgba(255, 107, 53, 0.15); |
| --green: #34d399; |
| --yellow: #fbbf24; |
| --red: #f87171; |
| --font-body: 'Instrument Sans', sans-serif; |
| --font-mono: 'DM Mono', monospace; |
| } |
| |
| body { |
| background: var(--bg); |
| color: var(--text); |
| font-family: var(--font-body); |
| min-height: 100vh; |
| display: flex; |
| align-items: center; |
| justify-content: center; |
| } |
| |
| .container { |
| width: 100%; |
| max-width: 520px; |
| padding: 2rem; |
| } |
| |
| h1 { |
| font-size: 1.1rem; |
| font-weight: 700; |
| letter-spacing: 0.08em; |
| text-transform: uppercase; |
| margin-bottom: 2rem; |
| display: flex; |
| align-items: center; |
| gap: 0.6rem; |
| } |
| |
| h1 .dot { |
| width: 8px; height: 8px; |
| border-radius: 50%; |
| background: var(--accent); |
| box-shadow: 0 0 12px var(--accent); |
| } |
| |
| .drop-zone { |
| border: 2px dashed var(--border); |
| border-radius: 12px; |
| padding: 3rem 2rem; |
| text-align: center; |
| cursor: pointer; |
| transition: border-color 0.2s, background 0.2s; |
| position: relative; |
| } |
| |
| .drop-zone:hover, .drop-zone.dragover { |
| border-color: var(--accent); |
| background: var(--accent-glow); |
| } |
| |
| .drop-zone.has-file { |
| border-style: solid; |
| border-color: var(--border); |
| padding: 1.2rem 1.5rem; |
| text-align: left; |
| } |
| |
| .drop-zone label { |
| font-size: 0.85rem; |
| color: var(--text-dim); |
| display: block; |
| cursor: pointer; |
| } |
| |
| .drop-zone .filename { |
| font-family: var(--font-mono); |
| font-size: 0.9rem; |
| margin-top: 0.3rem; |
| color: var(--text); |
| } |
| |
| .drop-zone input { display: none; } |
| |
| .status-bar { |
| display: flex; |
| gap: 1.2rem; |
| margin-top: 1.2rem; |
| font-family: var(--font-mono); |
| font-size: 0.75rem; |
| color: var(--text-dim); |
| } |
| |
| .status-bar .chip { |
| display: flex; |
| align-items: center; |
| gap: 0.4rem; |
| background: var(--surface); |
| border: 1px solid var(--border); |
| border-radius: 6px; |
| padding: 0.35rem 0.7rem; |
| } |
| |
| .chip .indicator { |
| width: 6px; height: 6px; |
| border-radius: 50%; |
| background: var(--text-dim); |
| } |
| |
| .chip .indicator.gpu { background: var(--green); box-shadow: 0 0 6px var(--green); } |
| .chip .indicator.cpu { background: var(--yellow); box-shadow: 0 0 6px var(--yellow); } |
| |
| button#process { |
| width: 100%; |
| margin-top: 1.5rem; |
| padding: 0.9rem; |
| border: none; |
| border-radius: 10px; |
| background: var(--accent); |
| color: #fff; |
| font-family: var(--font-body); |
| font-size: 0.9rem; |
| font-weight: 600; |
| cursor: pointer; |
| transition: opacity 0.2s, transform 0.1s; |
| } |
| |
| button#process:hover { opacity: 0.9; } |
| button#process:active { transform: scale(0.98); } |
| button#process:disabled { opacity: 0.4; cursor: not-allowed; transform: none; } |
| |
| .progress-wrap { |
| margin-top: 1.5rem; |
| display: none; |
| } |
| |
| .progress-wrap.active { display: block; } |
| |
| .progress-bar-bg { |
| width: 100%; |
| height: 4px; |
| background: var(--surface); |
| border-radius: 2px; |
| overflow: hidden; |
| } |
| |
| .progress-bar { |
| height: 100%; |
| width: 0%; |
| background: var(--accent); |
| border-radius: 2px; |
| transition: width 0.15s; |
| } |
| |
| .progress-info { |
| display: flex; |
| justify-content: space-between; |
| margin-top: 0.6rem; |
| font-family: var(--font-mono); |
| font-size: 0.75rem; |
| color: var(--text-dim); |
| } |
| |
| .ab-section { |
| margin-top: 1.5rem; |
| display: flex; |
| flex-direction: column; |
| gap: 1rem; |
| } |
| |
| .ab-player { |
| background: var(--surface); |
| border: 1px solid var(--border); |
| border-radius: 10px; |
| padding: 1rem; |
| } |
| |
| .ab-label { |
| font-family: var(--font-mono); |
| font-size: 0.8rem; |
| font-weight: 500; |
| margin-bottom: 0.5rem; |
| color: var(--text); |
| } |
| |
| .ab-label .ab-sr { |
| color: var(--text-dim); |
| font-weight: 400; |
| } |
| |
| .ab-player audio { |
| width: 100%; |
| border-radius: 6px; |
| } |
| |
| .ab-player a { |
| display: inline-block; |
| margin-top: 0.6rem; |
| font-family: var(--font-mono); |
| font-size: 0.8rem; |
| color: var(--accent); |
| text-decoration: none; |
| } |
| |
| .ab-player a:hover { text-decoration: underline; } |
| |
| .model-loading { |
| margin-top: 1rem; |
| font-family: var(--font-mono); |
| font-size: 0.75rem; |
| color: var(--text-dim); |
| display: none; |
| } |
| |
| .model-loading.active { display: block; } |
| |
| footer { |
| margin-top: 3rem; |
| padding-top: 1.2rem; |
| border-top: 1px solid var(--border); |
| font-family: var(--font-mono); |
| font-size: 0.7rem; |
| color: var(--text-dim); |
| display: flex; |
| flex-direction: column; |
| gap: 0.3rem; |
| } |
| |
| footer a { |
| color: var(--text-dim); |
| text-decoration: none; |
| border-bottom: 1px dotted var(--text-dim); |
| } |
| |
| footer a:hover { color: var(--text); border-color: var(--text); } |
| |
| @keyframes pulse { 0%,100% { opacity: 1; } 50% { opacity: 0.4; } } |
| .pulsing { animation: pulse 1.5s ease-in-out infinite; } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1><span class="dot"></span>Vox Upscaler</h1> |
|
|
| <div class="drop-zone" id="dropZone"> |
| <label>Drop an audio file or click to browse</label> |
| <div class="filename" id="fileName" style="display:none"></div> |
| <input type="file" id="fileInput" accept="audio/*"> |
| </div> |
|
|
| <div class="status-bar"> |
| <div class="chip"><span class="indicator" id="backendDot"></span><span id="backendLabel">detecting…</span></div> |
| <div class="chip" id="rtfChip" style="display:none">RTFx: <span id="rtfValue">—</span></div> |
| <div class="chip" id="modelChip"><span id="modelStatus">model not loaded</span></div> |
| </div> |
|
|
| <div class="model-loading" id="modelLoading"></div> |
|
|
| <button id="process" disabled>Upscale to 48 kHz</button> |
|
|
| <div class="progress-wrap" id="progressWrap"> |
| <div class="progress-bar-bg"><div class="progress-bar" id="progressBar"></div></div> |
| <div class="progress-info"> |
| <span id="progressLabel">Processing…</span> |
| <span id="progressPct">0%</span> |
| </div> |
| </div> |
|
|
| <div class="ab-section" id="abSection" style="display:none"> |
| <div class="ab-player"> |
| <div class="ab-label">Input<span class="ab-sr" id="inputSrLabel"></span></div> |
| <audio controls id="inputPlayer"></audio> |
| </div> |
| <div class="ab-player" id="outputPanel" style="display:none"> |
| <div class="ab-label">Output<span class="ab-sr"> — 48 kHz</span></div> |
| <audio controls id="audioPlayer"></audio> |
| <a id="downloadLink" download>Download WAV</a> |
| </div> |
| </div> |
|
|
| <footer> |
| <span>VAE model: <a href="https://huggingface.co/openbmb/VoxCPM2/blob/main/audiovae.pth" target="_blank">VoxCPM2</a> by <a href="https://huggingface.co/openbmb" target="_blank">OpenBMB</a> · Apache-2.0</span> |
| <span>WebGPU port by <a href="https://huggingface.co/KevinAHM" target="_blank">KevinAHM</a></span> |
| </footer> |
| </div> |
|
|
| <script> |
| const HOP = 640; |
| const TARGET_SR = 48000; |
| const INPUT_SR = 16000; |
| const META_URL = 'onnx/meta.json'; |
| const MODEL_URL = 'onnx/vae_stream.onnx'; |
| |
| let session = null; |
| let meta = null; |
| let backend = null; |
| let fileBuffer = null; |
| let fileName = ''; |
| |
| const dropZone = document.getElementById('dropZone'); |
| const fileInput = document.getElementById('fileInput'); |
| const fileNameEl = document.getElementById('fileName'); |
| const processBtn = document.getElementById('process'); |
| const progressWrap = document.getElementById('progressWrap'); |
| const progressBar = document.getElementById('progressBar'); |
| const progressLabel = document.getElementById('progressLabel'); |
| const progressPct = document.getElementById('progressPct'); |
| const audioPlayer = document.getElementById('audioPlayer'); |
| const downloadLink = document.getElementById('downloadLink'); |
| const backendDot = document.getElementById('backendDot'); |
| const backendLabel = document.getElementById('backendLabel'); |
| const rtfChip = document.getElementById('rtfChip'); |
| const rtfValue = document.getElementById('rtfValue'); |
| const modelStatus = document.getElementById('modelStatus'); |
| const modelLoading = document.getElementById('modelLoading'); |
| |
| |
| dropZone.addEventListener('click', () => fileInput.click()); |
| dropZone.addEventListener('dragover', e => { e.preventDefault(); dropZone.classList.add('dragover'); }); |
| dropZone.addEventListener('dragleave', () => dropZone.classList.remove('dragover')); |
| dropZone.addEventListener('drop', e => { e.preventDefault(); dropZone.classList.remove('dragover'); handleFile(e.dataTransfer.files[0]); }); |
| fileInput.addEventListener('change', () => { if (fileInput.files[0]) handleFile(fileInput.files[0]); }); |
| |
| function handleFile(file) { |
| fileName = file.name; |
| fileNameEl.textContent = file.name; |
| fileNameEl.style.display = 'block'; |
| dropZone.classList.add('has-file'); |
| dropZone.querySelector('label').textContent = 'Selected file'; |
| |
| |
| document.getElementById('inputPlayer').src = URL.createObjectURL(file); |
| document.getElementById('abSection').style.display = 'flex'; |
| document.getElementById('outputPanel').style.display = 'none'; |
| |
| |
| file.arrayBuffer().then(buf => { |
| fileBuffer = buf; |
| const view = new DataView(buf); |
| let srText = 'Original'; |
| |
| if (buf.byteLength > 28) { |
| const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); |
| if (riff === 'RIFF') { |
| const sr = view.getUint32(24, true); |
| srText = (sr / 1000) + ' kHz'; |
| } |
| } |
| document.getElementById('inputSrLabel').textContent = ' — ' + srText; |
| updateBtn(); |
| }); |
| } |
| |
| function updateBtn() { processBtn.disabled = !(fileBuffer && session); } |
| |
| async function readTensorData(tensor) { |
| return typeof tensor.getData === 'function' ? await tensor.getData() : tensor.data; |
| } |
| |
| |
| async function init() { |
| |
| if (navigator.gpu) { |
| try { |
| const adapter = await navigator.gpu.requestAdapter(); |
| if (adapter) { |
| |
| const origRequestDevice = GPUAdapter.prototype.requestDevice; |
| const adapterLimits = adapter.limits; |
| GPUAdapter.prototype.requestDevice = function(desc) { |
| desc = desc || {}; |
| desc.requiredLimits = desc.requiredLimits || {}; |
| const rl = desc.requiredLimits; |
| rl.maxStorageBuffersPerShaderStage = adapterLimits.maxStorageBuffersPerShaderStage; |
| rl.maxBufferSize = adapterLimits.maxBufferSize; |
| rl.maxStorageBufferBindingSize = adapterLimits.maxStorageBufferBindingSize; |
| console.log(`[VoxUpscaler] patched requestDevice:`, JSON.stringify(rl)); |
| return origRequestDevice.call(this, desc); |
| }; |
| backend = 'webgpu'; |
| backendDot.className = 'indicator gpu'; |
| backendLabel.textContent = 'WebGPU'; |
| } |
| } catch(e) {} |
| } |
| if (!backend) { |
| backend = 'cpu'; |
| backendDot.className = 'indicator cpu'; |
| backendLabel.textContent = 'CPU (WASM)'; |
| } |
| |
| |
| modelLoading.classList.add('active'); |
| modelLoading.innerHTML = '<span class="pulsing">Loading model metadata…</span>'; |
| const resp = await fetch(META_URL); |
| meta = await resp.json(); |
| |
| |
| modelLoading.innerHTML = '<span class="pulsing">Loading ONNX model (fp32, ~376 MB)…</span>'; |
| modelStatus.textContent = 'loading…'; |
| |
| const ep = backend === 'webgpu' ? 'webgpu' : 'wasm'; |
| const opts = { executionProviders: [ep] }; |
| if (ep === 'webgpu') { |
| opts.preferredOutputLocation = { |
| audio_out: 'cpu', |
| state_out: 'cpu', |
| }; |
| } else { |
| opts.executionProviders = [{ name: 'wasm', options: { numThreads: navigator.hardwareConcurrency || 4 } }]; |
| } |
| |
| try { |
| |
| const modelResp = await fetch(MODEL_URL); |
| const modelBuf = await modelResp.arrayBuffer(); |
| session = await ort.InferenceSession.create(modelBuf, opts); |
| modelStatus.textContent = 'ready'; |
| modelLoading.innerHTML = '✓ Model loaded'; |
| modelLoading.classList.remove('active'); |
| setTimeout(() => { modelLoading.style.display = 'none'; }, 1500); |
| } catch(e) { |
| |
| if (backend === 'webgpu') { |
| backend = 'cpu'; |
| backendDot.className = 'indicator cpu'; |
| backendLabel.textContent = 'CPU (WASM)'; |
| modelLoading.innerHTML = '<span class="pulsing">WebGPU failed, falling back to CPU (fp32)…</span>'; |
| session = await ort.InferenceSession.create(MODEL_URL, { |
| executionProviders: [{ name: 'wasm', options: { numThreads: navigator.hardwareConcurrency || 4 } }] |
| }); |
| modelStatus.textContent = 'ready'; |
| modelLoading.innerHTML = '✓ Model loaded (CPU fallback)'; |
| } else { |
| modelLoading.innerHTML = 'Failed to load model: ' + e.message; |
| modelStatus.textContent = 'error'; |
| return; |
| } |
| } |
| updateBtn(); |
| } |
| |
| function mixToMono(audioBuffer) { |
| const len = audioBuffer.length; |
| const mono = new Float32Array(len); |
| for (let ch = 0; ch < audioBuffer.numberOfChannels; ch++) { |
| const data = audioBuffer.getChannelData(ch); |
| for (let i = 0; i < len; i++) mono[i] += data[i]; |
| } |
| const gain = 1 / audioBuffer.numberOfChannels; |
| for (let i = 0; i < len; i++) mono[i] *= gain; |
| return mono; |
| } |
| |
| function readFourCc(view, offset) { |
| return String.fromCharCode( |
| view.getUint8(offset), |
| view.getUint8(offset + 1), |
| view.getUint8(offset + 2), |
| view.getUint8(offset + 3) |
| ); |
| } |
| |
| function decodeWavToMono(arrayBuffer) { |
| if (arrayBuffer.byteLength < 44) return null; |
| const view = new DataView(arrayBuffer); |
| if (readFourCc(view, 0) !== 'RIFF' || readFourCc(view, 8) !== 'WAVE') return null; |
| |
| let offset = 12; |
| let fmt = null; |
| let dataOffset = 0; |
| let dataSize = 0; |
| |
| while (offset + 8 <= view.byteLength) { |
| const id = readFourCc(view, offset); |
| const size = view.getUint32(offset + 4, true); |
| const chunkStart = offset + 8; |
| if (id === 'fmt ') { |
| const format = view.getUint16(chunkStart, true); |
| fmt = { |
| format: format === 0xfffe && size >= 40 ? view.getUint16(chunkStart + 24, true) : format, |
| channels: view.getUint16(chunkStart + 2, true), |
| sampleRate: view.getUint32(chunkStart + 4, true), |
| blockAlign: view.getUint16(chunkStart + 12, true), |
| bitsPerSample: view.getUint16(chunkStart + 14, true), |
| }; |
| } else if (id === 'data') { |
| dataOffset = chunkStart; |
| dataSize = size; |
| break; |
| } |
| offset = chunkStart + size + (size % 2); |
| } |
| |
| if (!fmt || !dataOffset || !dataSize) return null; |
| if (fmt.format !== 1 && fmt.format !== 3) return null; |
| const bytesPerSample = fmt.bitsPerSample / 8; |
| if (!Number.isInteger(bytesPerSample) || bytesPerSample < 1) return null; |
| const frames = Math.floor(dataSize / fmt.blockAlign); |
| const mono = new Float32Array(frames); |
| |
| const readSample = (pos) => { |
| if (fmt.format === 3 && fmt.bitsPerSample === 32) return view.getFloat32(pos, true); |
| if (fmt.format !== 1) return 0; |
| if (fmt.bitsPerSample === 8) return (view.getUint8(pos) - 128) / 128; |
| if (fmt.bitsPerSample === 16) return view.getInt16(pos, true) / 32768; |
| if (fmt.bitsPerSample === 24) { |
| let v = view.getUint8(pos) | (view.getUint8(pos + 1) << 8) | (view.getUint8(pos + 2) << 16); |
| if (v & 0x800000) v |= 0xff000000; |
| return v / 8388608; |
| } |
| if (fmt.bitsPerSample === 32) return view.getInt32(pos, true) / 2147483648; |
| return 0; |
| }; |
| |
| for (let frame = 0; frame < frames; frame++) { |
| const frameOffset = dataOffset + frame * fmt.blockAlign; |
| let sum = 0; |
| for (let ch = 0; ch < fmt.channels; ch++) { |
| sum += readSample(frameOffset + ch * bytesPerSample); |
| } |
| mono[frame] = sum / fmt.channels; |
| } |
| |
| return { |
| mono, |
| sampleRate: fmt.sampleRate, |
| channels: fmt.channels, |
| source: 'wav', |
| }; |
| } |
| |
| function sinc(x) { |
| if (Math.abs(x) < 1e-8) return 1; |
| const pix = Math.PI * x; |
| return Math.sin(pix) / pix; |
| } |
| |
| function resampleSinc(input, inSr, outSr) { |
| if (inSr === outSr) return new Float32Array(input); |
| const outLen = Math.round(input.length * outSr / inSr); |
| const output = new Float32Array(outLen); |
| const ratio = inSr / outSr; |
| const cutoff = Math.min(1, outSr / inSr) * 0.95; |
| const radius = 12; |
| const support = radius / cutoff; |
| |
| for (let i = 0; i < outLen; i++) { |
| const center = i * ratio; |
| const left = Math.max(0, Math.ceil(center - support)); |
| const right = Math.min(input.length - 1, Math.floor(center + support)); |
| let sum = 0; |
| let weightSum = 0; |
| |
| for (let j = left; j <= right; j++) { |
| const x = (center - j) * cutoff; |
| const weight = sinc(x) * sinc(x / radius); |
| sum += input[j] * weight; |
| weightSum += weight; |
| } |
| output[i] = weightSum ? sum / weightSum : 0; |
| } |
| return output; |
| } |
| |
| |
| async function decodeToMono16k(arrayBuffer) { |
| let decodedAudio = decodeWavToMono(arrayBuffer); |
| if (!decodedAudio) { |
| const AudioCtx = window.AudioContext || window.webkitAudioContext; |
| const audioCtx = new AudioCtx(); |
| const decoded = await audioCtx.decodeAudioData(arrayBuffer.slice(0)); |
| await audioCtx.close(); |
| decodedAudio = { |
| mono: mixToMono(decoded), |
| sampleRate: decoded.sampleRate, |
| channels: decoded.numberOfChannels, |
| source: 'webaudio', |
| }; |
| } |
| const origSr = decodedAudio.sampleRate; |
| const mono = decodedAudio.mono; |
| const audio16k = resampleSinc(mono, origSr, INPUT_SR); |
| return audio16k; |
| } |
| |
| |
| processBtn.addEventListener('click', async () => { |
| if (!fileBuffer || !session) return; |
| processBtn.disabled = true; |
| document.getElementById('outputPanel').style.display = 'none'; |
| progressWrap.classList.add('active'); |
| progressBar.style.width = '0%'; |
| progressPct.textContent = '0%'; |
| progressLabel.textContent = 'Decoding input…'; |
| |
| const audio16k = await decodeToMono16k(fileBuffer); |
| const totalSamples = audio16k.length; |
| const audioDuration = totalSamples / INPUT_SR; |
| |
| |
| const chunkMs = backend === 'webgpu' ? 5000 : 1000; |
| const chunkHops = Math.max(1, Math.floor(chunkMs / 1000 * INPUT_SR / HOP)); |
| const chunkSamples = chunkHops * HOP; |
| |
| |
| const pad = (HOP - totalSamples % HOP) % HOP; |
| let padded; |
| if (pad > 0) { |
| padded = new Float32Array(totalSamples + pad); |
| padded.set(audio16k); |
| } else { |
| padded = audio16k; |
| } |
| const totalPadded = padded.length; |
| |
| |
| |
| const stateFlat = meta.total_state_size; |
| const stateShape = [1, stateFlat]; |
| let state = new Float32Array(stateFlat); |
| const outputs = []; |
| const numChunks = Math.ceil(totalPadded / chunkSamples); |
| let chunkIdx = 0; |
| |
| progressLabel.textContent = 'Processing…'; |
| const t0 = performance.now(); |
| |
| |
| const srIdx = new Int32Array([0]); |
| |
| for (let pos = 0; pos < totalPadded; pos += chunkSamples) { |
| const end = Math.min(pos + chunkSamples, totalPadded); |
| const chunk = padded.slice(pos, end); |
| |
| const audioTensor = new ort.Tensor('float32', chunk, [1, 1, chunk.length]); |
| const srTensor = new ort.Tensor('int32', srIdx, [1]); |
| const stateTensor = new ort.Tensor('float32', state, stateShape); |
| |
| const result = await session.run({ |
| audio: audioTensor, |
| sr_bin_idx: srTensor, |
| state_in: stateTensor, |
| }); |
| |
| const audioOut = await readTensorData(result.audio_out); |
| const stateOut = await readTensorData(result.state_out); |
| |
| |
| if (chunkIdx === 0) { |
| const a = new Float32Array(audioOut); |
| const maxAmp = a.reduce((m, v) => Math.max(m, Math.abs(v)), 0); |
| const outShape = result.audio_out?.dims; |
| console.log('[VoxUpscaler] chunk 0 → audio_out shape:', outShape, |
| '| max amplitude:', maxAmp.toFixed(6), |
| maxAmp < 1e-6 ? '⚠️ SILENT — model outputting zeros' : '✓ signal present'); |
| } |
| |
| outputs.push(new Float32Array(audioOut)); |
| state = new Float32Array(stateOut); |
| |
| chunkIdx++; |
| const pct = Math.round(chunkIdx / numChunks * 100); |
| progressBar.style.width = pct + '%'; |
| progressPct.textContent = pct + '%'; |
| |
| const elapsed = (performance.now() - t0) / 1000; |
| const processedDur = end / INPUT_SR; |
| const rtf = processedDur / elapsed; |
| rtfChip.style.display = 'flex'; |
| rtfValue.textContent = rtf.toFixed(3) + 'x'; |
| } |
| |
| const totalElapsed = (performance.now() - t0) / 1000; |
| const finalRtf = audioDuration / totalElapsed; |
| rtfValue.textContent = finalRtf.toFixed(3) + 'x'; |
| progressLabel.textContent = `Done in ${totalElapsed.toFixed(1)}s`; |
| progressPct.textContent = '100%'; |
| progressBar.style.width = '100%'; |
| |
| |
| const totalOut = outputs.reduce((s, a) => s + a.length, 0); |
| const fullOutput = new Float32Array(totalOut); |
| let off = 0; |
| for (const o of outputs) { fullOutput.set(o, off); off += o.length; } |
| |
| |
| const expectedLen = Math.round(audioDuration * TARGET_SR); |
| const trimmed = fullOutput.slice(0, expectedLen); |
| |
| |
| const wav = encodeWav(trimmed, TARGET_SR); |
| const blob = new Blob([wav], { type: 'audio/wav' }); |
| const url = URL.createObjectURL(blob); |
| |
| audioPlayer.src = url; |
| const outName = fileName.replace(/\.[^.]+$/, '') + '_48k.wav'; |
| downloadLink.href = url; |
| downloadLink.download = outName; |
| downloadLink.textContent = 'Download ' + outName; |
| document.getElementById('outputPanel').style.display = 'block'; |
| processBtn.disabled = false; |
| }); |
| |
| function encodeWav(samples, sr) { |
| const len = samples.length; |
| const buf = new ArrayBuffer(44 + len * 2); |
| const view = new DataView(buf); |
| const writeStr = (o, s) => { for (let i = 0; i < s.length; i++) view.setUint8(o + i, s.charCodeAt(i)); }; |
| writeStr(0, 'RIFF'); |
| view.setUint32(4, 36 + len * 2, true); |
| writeStr(8, 'WAVE'); |
| writeStr(12, 'fmt '); |
| view.setUint32(16, 16, true); |
| view.setUint16(20, 1, true); |
| view.setUint16(22, 1, true); |
| view.setUint32(24, sr, true); |
| view.setUint32(28, sr * 2, true); |
| view.setUint16(32, 2, true); |
| view.setUint16(34, 16, true); |
| writeStr(36, 'data'); |
| view.setUint32(40, len * 2, true); |
| for (let i = 0; i < len; i++) { |
| let s = Math.max(-1, Math.min(1, samples[i])); |
| view.setInt16(44 + i * 2, s < 0 ? s * 0x8000 : s * 0x7FFF, true); |
| } |
| return buf; |
| } |
| |
| |
| const script = document.createElement('script'); |
| script.src = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.22.0/dist/ort.webgpu.min.js'; |
| script.crossOrigin = 'anonymous'; |
| script.onload = () => { |
| ort.env.wasm.numThreads = navigator.hardwareConcurrency || 4; |
| ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.22.0/dist/'; |
| init(); |
| }; |
| document.head.appendChild(script); |
| </script> |
| </body> |
| </html> |
|
|