| import * as ort from 'onnxruntime-web'; |
| const presetTexts = window.presetTexts || {}; |
|
|
| const PLAY_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 5v14l11-7-11-7z"></path></svg>`; |
| const PAUSE_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M8 6h3v12H8V6zm5 0h3v12h-3V6z"></path></svg>`; |
| const STOP_ICON_SVG = `<svg width="24" height="24" viewBox="0 0 24 24" fill="currentColor" aria-hidden="true" focusable="false"><path d="M7 7h10v10H7V7z"></path></svg>`; |
|
|
| |
| (function initLightningParallax() { |
| if (typeof document === 'undefined') { |
| return; |
| } |
|
|
| const runBlink = (className, onComplete) => { |
| let remaining = 1 + Math.round(Math.random()); |
| const blink = () => { |
| if (remaining-- <= 0) { |
| if (typeof onComplete === 'function') { |
| onComplete(); |
| } |
| return; |
| } |
| const wait = 20 + Math.random() * 80; |
| document.body.classList.add(className); |
| setTimeout(() => { |
| document.body.classList.remove(className); |
| setTimeout(blink, wait); |
| }, wait); |
| }; |
| blink(); |
| }; |
|
|
| const schedule = () => { |
| setTimeout(() => runBlink('lightning-flicker', schedule), Math.random() * 10000); |
| }; |
| schedule(); |
| })(); |
|
|
| function escapeHtml(value) { |
| return value.replace(/[&<>"']/g, (match) => { |
| switch (match) { |
| case '&': return '&'; |
| case '<': return '<'; |
| case '>': return '>'; |
| case '"': return '"'; |
| case "'": return '''; |
| default: return match; |
| } |
| }); |
| } |
|
|
| function formatStatValueWithSuffix(value, suffix, options = {}) { |
| const { firstLabel = false } = options; |
| if (value === undefined || value === null) { |
| return ''; |
| } |
| if (!suffix) { |
| const raw = `${value}`; |
| return escapeHtml(raw); |
| } |
| const raw = `${value}`.trim(); |
| if (!raw || raw === '--' || raw === '-' || raw.toLowerCase() === 'error') { |
| return escapeHtml(raw); |
| } |
| const appendSuffix = (segment, includePrefix = false) => { |
| const trimmed = segment.trim(); |
| if (!trimmed) { |
| return ''; |
| } |
| const escapedValue = `<span class="stat-value-number">${escapeHtml(trimmed)}</span>`; |
| const suffixSpan = `<span class="stat-label stat-suffix">${escapeHtml(suffix)}</span>`; |
| const prefixSpan = includePrefix && firstLabel |
| ? `<span class="stat-label stat-suffix stat-prefix">First</span>` |
| : ''; |
| const segmentClass = includePrefix && firstLabel |
| ? 'stat-value-segment has-prefix' |
| : 'stat-value-segment'; |
| return `<span class="${segmentClass}">${prefixSpan}${escapedValue}${suffixSpan}</span>`; |
| }; |
| if (raw.includes('/')) { |
| const parts = raw.split('/'); |
| const segments = parts.map((part, index) => appendSuffix(part, index === 0)); |
| return segments.join(' / '); |
| } |
| return appendSuffix(raw); |
| } |
|
|
| |
| |
| |
| export class UnicodeProcessor { |
| constructor(indexer) { |
| this.indexer = indexer; |
| } |
|
|
| call(textList, lang = null) { |
| const processedTexts = textList.map(t => preprocessText(t, lang)); |
| const textIdsLengths = processedTexts.map(t => t.length); |
| const maxLen = Math.max(...textIdsLengths); |
| |
| const textIds = []; |
| const unsupportedChars = new Set(); |
| |
| for (let i = 0; i < processedTexts.length; i++) { |
| const row = new Array(maxLen).fill(0); |
| const unicodeVals = textToUnicodeValues(processedTexts[i]); |
| for (let j = 0; j < unicodeVals.length; j++) { |
| const indexValue = this.indexer[unicodeVals[j]]; |
| |
| if (indexValue === undefined || indexValue === null || indexValue === -1) { |
| unsupportedChars.add(processedTexts[i][j]); |
| row[j] = 0; |
| } else { |
| row[j] = indexValue; |
| } |
| } |
| textIds.push(row); |
| } |
| |
| const textMask = getTextMask(textIdsLengths); |
| return { textIds, textMask, unsupportedChars: Array.from(unsupportedChars) }; |
| } |
| } |
|
|
| const AVAILABLE_LANGS = ["en", "ko", "es", "pt", "fr"]; |
|
|
| |
| |
| |
| |
| export function detectLanguage(text) { |
| if (!text || text.trim().length < 3) { |
| return null; |
| } |
| |
| |
| const sampleText = text.length > 100 ? text.substring(text.length - 100) : text; |
| |
| |
| const normalizedText = sampleText.normalize('NFC').toLowerCase(); |
| |
| |
| const koreanRegex = /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F\uA960-\uA97F\uD7B0-\uD7FF]/g; |
| const koreanMatches = normalizedText.match(koreanRegex) || []; |
| if (koreanMatches.length >= 2) { |
| return 'ko'; |
| } |
| |
| |
| const scores = { en: 0, es: 0, fr: 0, pt: 0 }; |
| |
| |
| if (/ñ/.test(normalizedText)) scores.es += 15; |
| if (/[¿¡]/.test(normalizedText)) scores.es += 12; |
| if (/ã/.test(normalizedText)) scores.pt += 15; |
| if (/õ/.test(normalizedText)) scores.pt += 15; |
| if (/œ/.test(normalizedText)) scores.fr += 15; |
| if (/[ùû]/.test(normalizedText)) scores.fr += 10; |
| |
| |
| if (/ç/.test(normalizedText)) { |
| scores.fr += 4; |
| scores.pt += 4; |
| } |
| |
| |
| if (/[èêë]/.test(normalizedText)) scores.fr += 5; |
| if (/[àâ]/.test(normalizedText)) scores.fr += 3; |
| if (/[îï]/.test(normalizedText)) scores.fr += 4; |
| if (/ô/.test(normalizedText)) scores.fr += 3; |
| |
| |
| const exclusiveWords = { |
| en: ['the', 'is', 'are', 'was', 'were', 'have', 'has', 'been', 'will', 'would', 'could', 'should', 'this', 'that', 'with', 'from', 'they', 'what', 'which', 'there', 'their', 'about', 'these', 'other', 'into', 'just', 'your', 'some', 'than', 'them', 'then', 'only', 'being', 'through', 'after', 'before'], |
| es: ['el', 'los', 'las', 'es', 'está', 'están', 'porque', 'pero', 'muy', 'también', 'más', 'este', 'esta', 'estos', 'estas', 'ese', 'esa', 'yo', 'tú', 'nosotros', 'ellos', 'ellas', 'hola', 'gracias', 'buenos', 'buenas', 'ahora', 'siempre', 'nunca', 'todo', 'nada', 'algo', 'alguien'], |
| fr: ['le', 'les', 'est', 'sont', 'dans', 'ce', 'cette', 'ces', 'il', 'elle', 'ils', 'elles', 'je', 'tu', 'nous', 'vous', 'avec', 'sur', 'ne', 'pas', 'plus', 'tout', 'bien', 'fait', 'être', 'avoir', 'donc', 'car', 'ni', 'jamais', 'toujours', 'rien', 'quelque', 'encore', 'aussi', 'très', 'peu', 'ici'], |
| pt: ['os', 'as', 'é', 'são', 'está', 'estão', 'não', 'na', 'no', 'da', 'do', 'das', 'dos', 'ao', 'aos', 'ele', 'ela', 'eles', 'elas', 'eu', 'nós', 'você', 'vocês', 'seu', 'sua', 'seus', 'suas', 'muito', 'também', 'já', 'foi', 'só', 'mesmo', 'ter', 'até', 'isso', 'olá', 'obrigado', 'obrigada', 'bom', 'boa', 'agora', 'sempre', 'nunca', 'tudo', 'nada', 'algo', 'alguém'] |
| }; |
| |
| |
| const words = normalizedText.match(/[a-záàâãäåçéèêëíìîïñóòôõöúùûüýÿœæ]+/g) || []; |
| |
| for (const word of words) { |
| for (const [lang, wordList] of Object.entries(exclusiveWords)) { |
| if (wordList.includes(word)) { |
| scores[lang] += 3; |
| } |
| } |
| } |
| |
| |
| const ngramPatterns = { |
| en: [/th/g, /ing/g, /tion/g, /ight/g, /ould/g], |
| es: [/ción/g, /mente/g, /ado/g, /ido/g], |
| fr: [/tion/g, /ment/g, /eau/g, /aux/g, /eux/g, /oir/g, /ais/g, /ait/g, /ont/g], |
| pt: [/ção/g, /ões/g, /mente/g, /ado/g, /ido/g, /nh/g, /lh/g] |
| }; |
| |
| for (const [lang, patterns] of Object.entries(ngramPatterns)) { |
| for (const pattern of patterns) { |
| const matches = normalizedText.match(pattern) || []; |
| scores[lang] += matches.length * 2; |
| } |
| } |
| |
| |
| const frenchContractions = /[cdjlmnst]'[aeiouéèêàâîïôûù]/g; |
| const frenchContractionMatches = normalizedText.match(frenchContractions) || []; |
| scores.fr += frenchContractionMatches.length * 5; |
| |
| |
| |
| if (/\bthe\b/.test(normalizedText)) scores.en += 5; |
| if (/\b(el|los)\b/.test(normalizedText)) scores.es += 4; |
| if (/\b(le|les)\b/.test(normalizedText)) scores.fr += 4; |
| if (/\b(o|os)\b/.test(normalizedText)) scores.pt += 3; |
| |
| |
| let maxScore = 0; |
| let detectedLang = null; |
| |
| for (const [lang, score] of Object.entries(scores)) { |
| if (score > maxScore) { |
| maxScore = score; |
| detectedLang = lang; |
| } |
| } |
| |
| |
| if (maxScore >= 4) { |
| return detectedLang; |
| } |
| |
| return null; |
| } |
|
|
| |
| const LANGUAGE_NAMES = { |
| 'en': 'English', |
| 'ko': 'Korean', |
| 'es': 'Spanish', |
| 'pt': 'Portuguese', |
| 'fr': 'French' |
| }; |
|
|
| export function preprocessText(text, lang = null) { |
| |
| text = text.normalize('NFKD'); |
| |
| |
| text = text.replace(/[\u{1F600}-\u{1F64F}\u{1F300}-\u{1F5FF}\u{1F680}-\u{1F6FF}\u{1F700}-\u{1F77F}\u{1F780}-\u{1F7FF}\u{1F800}-\u{1F8FF}\u{1F900}-\u{1F9FF}\u{1FA00}-\u{1FA6F}\u{1FA70}-\u{1FAFF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1F1E6}-\u{1F1FF}]+/gu, ''); |
| |
| |
| const replacements = { |
| "–": "-", |
| "‑": "-", |
| "—": "-", |
| "_": " ", |
| "\u201C": '"', |
| "\u201D": '"', |
| "\u2018": "'", |
| "\u2019": "'", |
| "´": "'", |
| "`": "'", |
| "[": " ", |
| "]": " ", |
| "|": " ", |
| "/": " ", |
| "#": " ", |
| "→": " ", |
| "←": " ", |
| }; |
| |
| for (const [k, v] of Object.entries(replacements)) { |
| text = text.replaceAll(k, v); |
| } |
|
|
| |
| text = text.replace(/[♥☆♡©\\]/g, ""); |
|
|
| |
| const exprReplacements = { |
| "@": " at ", |
| "e.g.,": "for example,", |
| "i.e.,": "that is,", |
| }; |
| |
| for (const [k, v] of Object.entries(exprReplacements)) { |
| text = text.replaceAll(k, v); |
| } |
| |
| |
| text = text.replace(/ ,/g, ","); |
| text = text.replace(/ \./g, "."); |
| text = text.replace(/ !/g, "!"); |
| text = text.replace(/ \?/g, "?"); |
| text = text.replace(/ ;/g, ";"); |
| text = text.replace(/ :/g, ":"); |
| text = text.replace(/ '/g, "'"); |
| |
| |
| while (text.includes('""')) { |
| text = text.replace(/""/g, '"'); |
| } |
| while (text.includes("''")) { |
| text = text.replace(/''/g, "'"); |
| } |
| while (text.includes("``")) { |
| text = text.replace(/``/g, "`"); |
| } |
| |
| |
| text = text.replace(/\s+/g, " ").trim(); |
|
|
| |
| if (!/[.!?;:,'"')\]}…。」』】〉》›»]$/.test(text)) { |
| text += "."; |
| } |
| |
| |
| if (lang !== null) { |
| if (!AVAILABLE_LANGS.includes(lang)) { |
| throw new Error(`Invalid language: ${lang}`); |
| } |
| text = `<${lang}>` + text + `</${lang}>`; |
| } else { |
| text = `<na>` + text + `</na>`; |
| } |
| |
| return text; |
| } |
|
|
| export function textToUnicodeValues(text) { |
| return Array.from(text).map(char => char.charCodeAt(0)); |
| } |
|
|
| export function lengthToMask(lengths, maxLen = null) { |
| maxLen = maxLen || Math.max(...lengths); |
| const mask = []; |
| for (let i = 0; i < lengths.length; i++) { |
| const row = []; |
| for (let j = 0; j < maxLen; j++) { |
| row.push(j < lengths[i] ? 1.0 : 0.0); |
| } |
| mask.push([row]); |
| } |
| return mask; |
| } |
|
|
| export function getTextMask(textIdsLengths) { |
| return lengthToMask(textIdsLengths); |
| } |
|
|
| export function getLatentMask(wavLengths, cfgs) { |
| const baseChunkSize = cfgs.ae.base_chunk_size; |
| const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
| const latentSize = baseChunkSize * chunkCompressFactor; |
| const latentLengths = wavLengths.map(len => |
| Math.floor((len + latentSize - 1) / latentSize) |
| ); |
| return lengthToMask(latentLengths); |
| } |
|
|
| export function sampleNoisyLatent(duration, cfgs) { |
| const sampleRate = cfgs.ae.sample_rate; |
| const baseChunkSize = cfgs.ae.base_chunk_size; |
| const chunkCompressFactor = cfgs.ttl.chunk_compress_factor; |
| const ldim = cfgs.ttl.latent_dim; |
|
|
| const wavLenMax = Math.max(...duration.map(d => d[0][0])) * sampleRate; |
| const wavLengths = duration.map(d => Math.floor(d[0][0] * sampleRate)); |
| const chunkSize = baseChunkSize * chunkCompressFactor; |
| const latentLen = Math.floor((wavLenMax + chunkSize - 1) / chunkSize); |
| const latentDim = ldim * chunkCompressFactor; |
|
|
| const noisyLatent = []; |
| for (let b = 0; b < duration.length; b++) { |
| const batch = []; |
| for (let d = 0; d < latentDim; d++) { |
| const row = []; |
| for (let t = 0; t < latentLen; t++) { |
| const u1 = Math.random(); |
| const u2 = Math.random(); |
| const randNormal = Math.sqrt(-2.0 * Math.log(u1)) * Math.cos(2.0 * Math.PI * u2); |
| row.push(randNormal); |
| } |
| batch.push(row); |
| } |
| noisyLatent.push(batch); |
| } |
|
|
| const latentMask = getLatentMask(wavLengths, cfgs); |
| |
| for (let b = 0; b < noisyLatent.length; b++) { |
| for (let d = 0; d < noisyLatent[b].length; d++) { |
| for (let t = 0; t < noisyLatent[b][d].length; t++) { |
| noisyLatent[b][d][t] *= latentMask[b][0][t]; |
| } |
| } |
| } |
|
|
| return { noisyLatent, latentMask }; |
| } |
|
|
| export async function loadOnnx(onnxPath, opts) { |
| return await ort.InferenceSession.create(onnxPath, opts); |
| } |
|
|
| export async function loadOnnxAll(basePath, opts, onProgress) { |
| const models = [ |
| { name: 'Duration Predictor', path: `${basePath}/duration_predictor.onnx`, key: 'dpOrt' }, |
| { name: 'Text Encoder', path: `${basePath}/text_encoder.onnx`, key: 'textEncOrt' }, |
| { name: 'Vector Estimator', path: `${basePath}/vector_estimator.onnx`, key: 'vectorEstOrt' }, |
| { name: 'Vocoder', path: `${basePath}/vocoder.onnx`, key: 'vocoderOrt' } |
| ]; |
|
|
| const result = {}; |
| let loadedCount = 0; |
| |
| |
| const loadPromises = models.map(async (model) => { |
| const session = await loadOnnx(model.path, opts); |
| loadedCount++; |
| if (onProgress) { |
| onProgress(model.name, loadedCount, models.length); |
| } |
| return { key: model.key, session }; |
| }); |
| |
| |
| const loadedModels = await Promise.all(loadPromises); |
| |
| |
| loadedModels.forEach(({ key, session }) => { |
| result[key] = session; |
| }); |
|
|
| try { |
| |
| await fetch('https://huggingface.co/Supertone/supertonic-2/resolve/main/config.json'); |
| } catch (error) { |
| console.warn('Failed to update download count:', error); |
| } |
| return result; |
| } |
|
|
| export async function loadCfgs(basePath) { |
| const response = await fetch(`${basePath}/tts.json`); |
| return await response.json(); |
| } |
|
|
| export async function loadProcessors(basePath) { |
| const response = await fetch(`${basePath}/unicode_indexer.json`); |
| const unicodeIndexerData = await response.json(); |
| const textProcessor = new UnicodeProcessor(unicodeIndexerData); |
| |
| return { textProcessor }; |
| } |
|
|
| function parseWavFile(buffer) { |
| const view = new DataView(buffer); |
| |
| |
| const riff = String.fromCharCode(view.getUint8(0), view.getUint8(1), view.getUint8(2), view.getUint8(3)); |
| if (riff !== 'RIFF') { |
| throw new Error('Not a valid WAV file'); |
| } |
| |
| const wave = String.fromCharCode(view.getUint8(8), view.getUint8(9), view.getUint8(10), view.getUint8(11)); |
| if (wave !== 'WAVE') { |
| throw new Error('Not a valid WAV file'); |
| } |
| |
| let offset = 12; |
| let fmtChunk = null; |
| let dataChunk = null; |
| |
| while (offset < buffer.byteLength) { |
| const chunkId = String.fromCharCode( |
| view.getUint8(offset), |
| view.getUint8(offset + 1), |
| view.getUint8(offset + 2), |
| view.getUint8(offset + 3) |
| ); |
| const chunkSize = view.getUint32(offset + 4, true); |
| |
| if (chunkId === 'fmt ') { |
| fmtChunk = { |
| audioFormat: view.getUint16(offset + 8, true), |
| numChannels: view.getUint16(offset + 10, true), |
| sampleRate: view.getUint32(offset + 12, true), |
| bitsPerSample: view.getUint16(offset + 22, true) |
| }; |
| } else if (chunkId === 'data') { |
| dataChunk = { |
| offset: offset + 8, |
| size: chunkSize |
| }; |
| break; |
| } |
| |
| offset += 8 + chunkSize; |
| } |
| |
| if (!fmtChunk || !dataChunk) { |
| throw new Error('Invalid WAV file format'); |
| } |
| |
| const bytesPerSample = fmtChunk.bitsPerSample / 8; |
| const numSamples = Math.floor(dataChunk.size / (bytesPerSample * fmtChunk.numChannels)); |
| const audioData = new Float32Array(numSamples); |
| |
| if (fmtChunk.bitsPerSample === 16) { |
| for (let i = 0; i < numSamples; i++) { |
| let sample = 0; |
| for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 2; |
| sample += view.getInt16(sampleOffset, true); |
| } |
| audioData[i] = (sample / fmtChunk.numChannels) / 32768.0; |
| } |
| } else if (fmtChunk.bitsPerSample === 24) { |
| |
| for (let i = 0; i < numSamples; i++) { |
| let sample = 0; |
| for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 3; |
| |
| const byte1 = view.getUint8(sampleOffset); |
| const byte2 = view.getUint8(sampleOffset + 1); |
| const byte3 = view.getUint8(sampleOffset + 2); |
| let value = (byte3 << 16) | (byte2 << 8) | byte1; |
| |
| if (value & 0x800000) { |
| value = value - 0x1000000; |
| } |
| sample += value; |
| } |
| audioData[i] = (sample / fmtChunk.numChannels) / 8388608.0; |
| } |
| } else if (fmtChunk.bitsPerSample === 32) { |
| for (let i = 0; i < numSamples; i++) { |
| let sample = 0; |
| for (let ch = 0; ch < fmtChunk.numChannels; ch++) { |
| const sampleOffset = dataChunk.offset + (i * fmtChunk.numChannels + ch) * 4; |
| sample += view.getFloat32(sampleOffset, true); |
| } |
| audioData[i] = sample / fmtChunk.numChannels; |
| } |
| } else { |
| throw new Error(`Unsupported bit depth: ${fmtChunk.bitsPerSample}. Supported formats: 16-bit, 24-bit, 32-bit`); |
| } |
| |
| return { |
| sampleRate: fmtChunk.sampleRate, |
| audioData: audioData |
| }; |
| } |
|
|
| export function arrayToTensor(array, dims) { |
| const flat = array.flat(Infinity); |
| return new ort.Tensor('float32', Float32Array.from(flat), dims); |
| } |
|
|
| export function intArrayToTensor(array, dims) { |
| const flat = array.flat(Infinity); |
| return new ort.Tensor('int64', BigInt64Array.from(flat.map(x => BigInt(x))), dims); |
| } |
|
|
| export function writeWavFile(audioData, sampleRate) { |
| const numChannels = 1; |
| const bitsPerSample = 16; |
| const byteRate = sampleRate * numChannels * bitsPerSample / 8; |
| const blockAlign = numChannels * bitsPerSample / 8; |
| const dataSize = audioData.length * bitsPerSample / 8; |
|
|
| const buffer = new ArrayBuffer(44 + dataSize); |
| const view = new DataView(buffer); |
| |
| |
| view.setUint8(0, 'R'.charCodeAt(0)); |
| view.setUint8(1, 'I'.charCodeAt(0)); |
| view.setUint8(2, 'F'.charCodeAt(0)); |
| view.setUint8(3, 'F'.charCodeAt(0)); |
| view.setUint32(4, 36 + dataSize, true); |
| view.setUint8(8, 'W'.charCodeAt(0)); |
| view.setUint8(9, 'A'.charCodeAt(0)); |
| view.setUint8(10, 'V'.charCodeAt(0)); |
| view.setUint8(11, 'E'.charCodeAt(0)); |
| |
| |
| view.setUint8(12, 'f'.charCodeAt(0)); |
| view.setUint8(13, 'm'.charCodeAt(0)); |
| view.setUint8(14, 't'.charCodeAt(0)); |
| view.setUint8(15, ' '.charCodeAt(0)); |
| view.setUint32(16, 16, true); |
| view.setUint16(20, 1, true); |
| view.setUint16(22, numChannels, true); |
| view.setUint32(24, sampleRate, true); |
| view.setUint32(28, byteRate, true); |
| view.setUint16(32, blockAlign, true); |
| view.setUint16(34, bitsPerSample, true); |
| |
| |
| view.setUint8(36, 'd'.charCodeAt(0)); |
| view.setUint8(37, 'a'.charCodeAt(0)); |
| view.setUint8(38, 't'.charCodeAt(0)); |
| view.setUint8(39, 'a'.charCodeAt(0)); |
| view.setUint32(40, dataSize, true); |
| |
| |
| for (let i = 0; i < audioData.length; i++) { |
| const sample = Math.max(-1, Math.min(1, audioData[i])); |
| const intSample = Math.floor(sample * 32767); |
| view.setInt16(44 + i * 2, intSample, true); |
| } |
| |
| return buffer; |
| } |
|
|
|
|
|
|
| |
| document.addEventListener('DOMContentLoaded', () => { |
| |
| document.querySelectorAll('a[href^="#"]').forEach(anchor => { |
| anchor.addEventListener('click', function (e) { |
| e.preventDefault(); |
| const href = this.getAttribute('href'); |
| const target = document.querySelector(href); |
| if (target) { |
| |
| if (history.pushState) { |
| history.pushState(null, null, href); |
| } |
| target.scrollIntoView({ |
| behavior: 'smooth', |
| block: 'start' |
| }); |
| } |
| }); |
| }); |
| |
| |
| const observerOptions = { |
| threshold: 0.1, |
| rootMargin: '0px 0px -100px 0px' |
| }; |
| |
| const observer = new IntersectionObserver((entries) => { |
| entries.forEach(entry => { |
| if (entry.isIntersecting) { |
| entry.target.style.opacity = '1'; |
| entry.target.style.transform = 'translateY(0)'; |
| } |
| }); |
| }, observerOptions); |
| |
| }); |
|
|
| |
| (async function() { |
| |
| const demoTextInput = document.getElementById('demoTextInput'); |
| if (!demoTextInput) return; |
| |
| |
| ort.env.wasm.wasmPaths = 'https://cdn.jsdelivr.net/npm/onnxruntime-web@1.23.0/dist/'; |
| ort.env.wasm.numThreads = 1; |
| |
|
|
| |
| const REF_EMBEDDING_PATHS = { |
| 'F1': 'assets/voice_styles/F1.json', |
| 'F2': 'assets/voice_styles/F2.json', |
| 'F3': 'assets/voice_styles/F3.json', |
| 'F4': 'assets/voice_styles/F4.json', |
| 'F5': 'assets/voice_styles/F5.json', |
| 'M1': 'assets/voice_styles/M1.json', |
| 'M2': 'assets/voice_styles/M2.json', |
| 'M3': 'assets/voice_styles/M3.json', |
| 'M4': 'assets/voice_styles/M4.json', |
| 'M5': 'assets/voice_styles/M5.json' |
| }; |
|
|
| |
| const VOICE_DESCRIPTIONS = { |
| 'F1': 'Sarah - A calm female voice with a slightly low tone; steady and composed.', |
| 'F2': 'Lily - A bright, cheerful female voice; lively, playful, and youthful with spirited energy.', |
| 'F3': 'Jessica - A clear, professional announcer-style female voice; articulate and broadcast-ready.', |
| 'F4': 'Olivia - A crisp, confident female voice; distinct and expressive with strong delivery.', |
| 'F5': 'Emily - A kind, gentle female voice; soft-spoken, calm, and naturally soothing.', |
| 'M1': 'Alex - A lively, upbeat male voice with confident energy and a standard, clear tone.', |
| 'M2': 'James - A deep, robust male voice; calm, composed, and serious with a grounded presence.', |
| 'M3': 'Robert - A polished, authoritative male voice; confident and trustworthy with strong presentation quality.', |
| 'M4': 'Sam - A soft, neutral-toned male voice; gentle and approachable with a youthful, friendly quality.', |
| 'M5': 'Daniel - A warm, soft-spoken male voice; calm and soothing with a natural storytelling quality.' |
| }; |
|
|
| |
| let models = null; |
| let cfgs = null; |
| let processors = null; |
| let currentVoice = 'M3'; |
| |
| |
| function detectBrowserLanguage() { |
| |
| const browserLang = navigator.language || navigator.userLanguage || 'en'; |
| |
| |
| const langCode = browserLang.split('-')[0].toLowerCase(); |
| |
| |
| const supportedLangs = ['en', 'es', 'pt', 'fr', 'ko']; |
| |
| |
| return supportedLangs.includes(langCode) ? langCode : 'en'; |
| } |
| |
| let currentLanguage = detectBrowserLanguage(); |
| let refEmbeddingCache = {}; |
| let currentStyleTtlTensor = null; |
| let currentStyleDpTensor = null; |
| let modelsLoading = false; |
| let modelsLoaded = false; |
| let modelsLoadPromise = null; |
|
|
| |
| const demoStatusBox = document.getElementById('demoStatusBox'); |
| const demoStatusText = document.getElementById('demoStatusText'); |
| const wasmWarningBanner = document.getElementById('wasmWarningBanner'); |
| const demoGenerateBtn = document.getElementById('demoGenerateBtn'); |
| const demoTotalSteps = document.getElementById('demoTotalSteps'); |
| const demoSpeed = document.getElementById('demoSpeed'); |
| const demoTotalStepsValue = document.getElementById('demoTotalStepsValue'); |
| const demoSpeedValue = document.getElementById('demoSpeedValue'); |
| const demoResults = document.getElementById('demoResults'); |
| const demoError = document.getElementById('demoError'); |
| const demoCharCount = document.getElementById('demoCharCount'); |
| const demoCharCounter = document.getElementById('demoCharCounter'); |
| const demoCharWarning = document.getElementById('demoCharWarning'); |
|
|
| |
| const MIN_CHARS = 10; |
| const MAX_CHUNK_LENGTH_DEFAULT = 300; |
| const MAX_CHUNK_LENGTH_KO = 120; |
| function getMaxChunkLength() { |
| return currentLanguage === 'ko' ? MAX_CHUNK_LENGTH_KO : MAX_CHUNK_LENGTH_DEFAULT; |
| } |
| |
| |
| let audioContext = null; |
| let scheduledSources = []; |
| let audioChunks = []; |
| let totalDuration = 0; |
| let startTime = 0; |
| let pauseTime = 0; |
| let isPaused = false; |
| let isPlaying = false; |
| let animationFrameId = null; |
| let playPauseBtn = null; |
| let progressBar = null; |
| let currentTimeDisplay = null; |
| let durationDisplay = null; |
| let progressFill = null; |
| let firstChunkGenerationTime = 0; |
| let totalChunks = 0; |
| let nextScheduledTime = 0; |
| let currentGenerationTextLength = 0; |
| let supertonicPlayerRecord = null; |
| let isGenerating = false; |
| |
| |
| let customAudioPlayers = []; |
|
|
| const isMobileViewport = () => window.matchMedia('(max-width: 768px)').matches; |
| |
| const isTouchDevice = () => 'ontouchstart' in window || navigator.maxTouchPoints > 0; |
| const trimDecimalsForMobile = (formatted) => { |
| if (!formatted) return formatted; |
| return isMobileViewport() ? formatted.replace(/\.\d{2}$/, '') : formatted; |
| }; |
|
|
| function pauseAllPlayersExcept(currentPlayer) { |
| customAudioPlayers.forEach(player => { |
| if (player !== currentPlayer && player && typeof player.pausePlayback === 'function') { |
| player.pausePlayback(); |
| } |
| }); |
| } |
|
|
|
|
| |
| |
| |
| |
| |
| |
| function chunkText(text, maxLen = getMaxChunkLength()) { |
| |
| const paragraphs = text.trim().split(/\n\s*\n+/).filter(p => p.trim()); |
| |
| const chunks = []; |
| |
| for (let paragraph of paragraphs) { |
| paragraph = paragraph.trim(); |
| if (!paragraph) continue; |
| |
| |
| |
| const sentences = paragraph.split(/(?<!Mr\.|Mrs\.|Ms\.|Dr\.|Prof\.|Sr\.|Jr\.|Ph\.D\.|etc\.|e\.g\.|i\.e\.|vs\.|Inc\.|Ltd\.|Co\.|Corp\.|St\.|Ave\.|Blvd\.)(?<!\b[A-Z]\.)(?<=[.!?])\s+/); |
| |
| let currentChunk = ""; |
| |
| for (let sentence of sentences) { |
| if (currentChunk.length + sentence.length + 1 <= maxLen) { |
| currentChunk += (currentChunk ? " " : "") + sentence; |
| } else { |
| if (currentChunk) { |
| chunks.push(currentChunk.trim()); |
| } |
| currentChunk = sentence; |
| } |
| } |
| |
| if (currentChunk) { |
| chunks.push(currentChunk.trim()); |
| } |
| } |
| |
| return chunks; |
| } |
|
|
| function showDemoStatus(message, type = 'info', progress = null) { |
| demoStatusText.innerHTML = message; |
| demoStatusBox.className = 'demo-status-box'; |
| demoStatusBox.style.removeProperty('--status-progress'); |
| demoStatusBox.style.display = ''; |
| |
| if (type === 'success') { |
| demoStatusBox.classList.add('success'); |
| } else if (type === 'error') { |
| demoStatusBox.classList.add('error'); |
| } |
| |
| |
| if (progress !== null && progress >= 0 && progress <= 100) { |
| const clampedProgress = Math.max(0, Math.min(progress, 100)); |
| demoStatusBox.style.setProperty('--status-progress', `${clampedProgress}%`); |
| demoStatusBox.classList.toggle('complete', clampedProgress >= 100); |
| } else if (type === 'success' || type === 'error') { |
| demoStatusBox.style.removeProperty('--status-progress'); |
| demoStatusBox.classList.remove('complete'); |
| } else { |
| demoStatusBox.style.removeProperty('--status-progress'); |
| demoStatusBox.classList.remove('complete'); |
| } |
| } |
|
|
| function hideDemoStatus() { |
| demoStatusBox.style.display = 'none'; |
| } |
|
|
| function showDemoError(message) { |
| demoError.textContent = message; |
| demoError.classList.add('active'); |
| } |
|
|
| function hideDemoError() { |
| demoError.classList.remove('active'); |
| } |
| |
| |
| const languageToast = document.getElementById('languageToast'); |
| const languageToastMessage = document.getElementById('languageToastMessage'); |
| let languageToastTimeout = null; |
| |
| function showLanguageToast(fromLang, toLang) { |
| if (!languageToast || !languageToastMessage) return; |
| |
| const fromName = LANGUAGE_NAMES[fromLang] || fromLang; |
| const toName = LANGUAGE_NAMES[toLang] || toLang; |
| |
| languageToastMessage.innerHTML = `Language auto-detected: <strong>${toName}</strong>`; |
| |
| |
| if (languageToastTimeout) { |
| clearTimeout(languageToastTimeout); |
| } |
| |
| |
| languageToast.classList.add('show'); |
| |
| |
| languageToastTimeout = setTimeout(() => { |
| languageToast.classList.remove('show'); |
| }, 3000); |
| } |
|
|
| function showWasmWarning() { |
| if (wasmWarningBanner) { |
| wasmWarningBanner.style.display = 'flex'; |
| } |
| } |
|
|
| |
| function validateCharacters(text) { |
| if (!processors || !processors.textProcessor) { |
| return { valid: true, unsupportedChars: [] }; |
| } |
| |
| try { |
| |
| const uniqueChars = [...new Set(text)]; |
| |
| |
| |
| const processedToOriginal = new Map(); |
| const charToProcessed = new Map(); |
| |
| for (const char of uniqueChars) { |
| const processedChar = preprocessText(char); |
| charToProcessed.set(char, processedChar); |
| |
| |
| for (const pc of processedChar) { |
| if (!processedToOriginal.has(pc)) { |
| processedToOriginal.set(pc, new Set()); |
| } |
| processedToOriginal.get(pc).add(char); |
| } |
| } |
| |
| |
| const fullProcessedText = Array.from(text).map(c => charToProcessed.get(c)).join(''); |
| |
| |
| const { unsupportedChars } = processors.textProcessor.call([fullProcessedText]); |
| |
| |
| const unsupportedOriginalChars = new Set(); |
| if (unsupportedChars && unsupportedChars.length > 0) { |
| for (const unsupportedChar of unsupportedChars) { |
| const originalChars = processedToOriginal.get(unsupportedChar); |
| if (originalChars) { |
| originalChars.forEach(c => unsupportedOriginalChars.add(c)); |
| } |
| } |
| } |
| |
| const unsupportedCharsArray = Array.from(unsupportedOriginalChars); |
| return { |
| valid: unsupportedCharsArray.length === 0, |
| unsupportedChars: unsupportedCharsArray |
| }; |
| } catch (error) { |
| return { valid: true, unsupportedChars: [] }; |
| } |
| } |
|
|
| |
| function updateCharCounter() { |
| const rawText = demoTextInput.textContent || demoTextInput.innerText || ''; |
| const text = rawText.replace(/\n$/g, ''); |
| const length = text.length; |
| |
| demoCharCount.textContent = length; |
| |
| |
| const textareaWidth = demoTextInput.offsetWidth; |
| |
| |
| const maxWidthRef = 640; |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| const isMobile = window.innerWidth <= 572; |
| const mobileMultiplier = isMobile ? 2 : 1; |
| |
| let fontSizeRatio; |
| if (length <= 100) { |
| fontSizeRatio = 0.055 * mobileMultiplier; |
| } else if (length <= 200) { |
| fontSizeRatio = 0.04 * mobileMultiplier; |
| } else if (length < 240) { |
| fontSizeRatio = 0.053125 * mobileMultiplier; |
| } else if (length < 400) { |
| fontSizeRatio = 0.0425 * mobileMultiplier; |
| } else if (length < 700) { |
| fontSizeRatio = 0.031875 * mobileMultiplier; |
| } else { |
| fontSizeRatio = 0.025 * mobileMultiplier; |
| } |
| |
| |
| const fontSize = textareaWidth * fontSizeRatio; |
| demoTextInput.style.fontSize = `${fontSize}px`; |
| |
| |
| demoCharCounter.classList.remove('error', 'warning', 'valid'); |
| |
| |
| let hasUnsupportedChars = false; |
| if (models && processors && length > 0) { |
| const validation = validateCharacters(text); |
| if (!validation.valid && validation.unsupportedChars.length > 0) { |
| hasUnsupportedChars = true; |
| const charList = validation.unsupportedChars.slice(0, 5).map(c => `"${c}"`).join(', '); |
| const moreChars = validation.unsupportedChars.length > 5 ? ` and ${validation.unsupportedChars.length - 5} more` : ''; |
| showDemoError(`Unsupported characters detected: ${charList}${moreChars}. Please remove them before generating speech.`); |
| } else { |
| hideDemoError(); |
| } |
| } |
| |
| |
| if (length < MIN_CHARS) { |
| demoCharCounter.classList.add('error'); |
| demoCharWarning.textContent = '(At least 10 characters)'; |
| demoGenerateBtn.disabled = true; |
| } else if (hasUnsupportedChars) { |
| demoCharCounter.classList.add('error'); |
| demoCharWarning.textContent = '(Unsupported characters)'; |
| demoGenerateBtn.disabled = true; |
| } else { |
| demoCharCounter.classList.add('valid'); |
| demoCharWarning.textContent = ''; |
| |
| demoGenerateBtn.disabled = !models || isGenerating; |
| } |
| } |
|
|
| |
| function validateTextInput(text) { |
| if (!text || text.trim().length === 0) { |
| return { valid: false, message: 'Please enter some text.' }; |
| } |
| if (text.length < MIN_CHARS) { |
| return { valid: false, message: `Text must be at least ${MIN_CHARS} characters long. (Currently ${text.length})` }; |
| } |
| return { valid: true }; |
| } |
|
|
| |
| async function loadStyleEmbeddings(voice) { |
| try { |
| |
| if (refEmbeddingCache[voice]) { |
| return refEmbeddingCache[voice]; |
| } |
| |
| const embeddingPath = REF_EMBEDDING_PATHS[voice]; |
| if (!embeddingPath) { |
| throw new Error(`No embedding path configured for voice: ${voice}`); |
| } |
| |
| const response = await fetch(embeddingPath); |
| if (!response.ok) { |
| throw new Error(`Failed to fetch embedding: ${response.statusText}`); |
| } |
| |
| const embeddingData = await response.json(); |
| |
| |
| |
| const styleTtlData = embeddingData.style_ttl.data.flat(Infinity); |
| const styleTtlTensor = new ort.Tensor( |
| embeddingData.style_ttl.type || 'float32', |
| Float32Array.from(styleTtlData), |
| embeddingData.style_ttl.dims |
| ); |
| |
| const styleDpData = embeddingData.style_dp.data.flat(Infinity); |
| const styleDpTensor = new ort.Tensor( |
| embeddingData.style_dp.type || 'float32', |
| Float32Array.from(styleDpData), |
| embeddingData.style_dp.dims |
| ); |
| |
| const embeddings = { |
| styleTtl: styleTtlTensor, |
| styleDp: styleDpTensor |
| }; |
| |
| |
| refEmbeddingCache[voice] = embeddings; |
| |
| return embeddings; |
| } catch (error) { |
| throw error; |
| } |
| } |
| |
| |
| async function switchVoice(voice) { |
| try { |
| const embeddings = await loadStyleEmbeddings(voice); |
| |
| currentStyleTtlTensor = embeddings.styleTtl; |
| currentStyleDpTensor = embeddings.styleDp; |
| currentVoice = voice; |
| |
| |
| if (typeof window.updateActiveSpeaker === 'function') { |
| window.updateActiveSpeaker(voice); |
| } |
| |
| |
| updateCharCounter(); |
| } catch (error) { |
| showDemoError(`Failed to load voice ${voice}: ${error.message}`); |
| throw error; |
| } |
| } |
|
|
| |
| async function checkWebGPUSupport() { |
| try { |
| |
| const isIOS = /iPad|iPhone|iPod/.test(navigator.userAgent) || |
| (navigator.platform === 'MacIntel' && navigator.maxTouchPoints > 1); |
| const isSafari = /^((?!chrome|crios|android|edg|firefox).)*safari/i.test(navigator.userAgent); |
| |
| |
| if (isIOS) { |
| return { supported: false, reason: 'iOS does not support the required WebGPU features' }; |
| } |
| if (isSafari) { |
| |
| return { supported: false, reason: 'Safari does not support the required WebGPU features' }; |
| } |
| |
| |
| if (!navigator.gpu) { |
| return { supported: false, reason: 'WebGPU not available in this browser' }; |
| } |
| |
| |
| const adapter = await navigator.gpu.requestAdapter(); |
| if (!adapter) { |
| return { supported: false, reason: 'No WebGPU adapter found' }; |
| } |
| |
| |
| try { |
| const adapterInfo = await adapter.requestAdapterInfo(); |
| } catch (infoError) { |
| |
| } |
| |
| |
| const device = await adapter.requestDevice(); |
| if (!device) { |
| return { supported: false, reason: 'Failed to create WebGPU device' }; |
| } |
| |
| return { supported: true, adapter, device }; |
| } catch (error) { |
| |
| const errorMsg = error.message || ''; |
| if (errorMsg.includes('subgroupMinSize') || errorMsg.includes('subgroup')) { |
| return { supported: false, reason: 'iOS/Safari does not support required WebGPU features (subgroup operations)' }; |
| } |
| return { supported: false, reason: error.message }; |
| } |
| } |
|
|
| |
| async function warmupModels() { |
| try { |
| const dummyText = 'Looking to integrate Supertonic into your product? We offer customized on-device SDK solutions tailored to your business needs. Our lightweight, high-performance TTS technology can be seamlessly integrated into mobile apps, IoT devices, automotive systems, and more. Try it now, and enjoy its speed.'; |
| const totalStep = 5; |
| const durationFactor = 1.0; |
| |
| const textList = [dummyText]; |
| const bsz = 1; |
| |
| |
| const styleTtlTensor = currentStyleTtlTensor; |
| const styleDpTensor = currentStyleDpTensor; |
| |
| |
| const { textIds, textMask } = processors.textProcessor.call(textList, currentLanguage); |
| |
| const textIdsShape = [bsz, textIds[0].length]; |
| const textMaskShape = [bsz, 1, textMask[0][0].length]; |
| const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
| |
| const dpResult = await models.dpOrt.run({ |
| text_ids: intArrayToTensor(textIds, textIdsShape), |
| style_dp: styleDpTensor, |
| text_mask: textMaskTensor |
| }); |
| |
| const durOnnx = Array.from(dpResult.duration.data); |
| for (let i = 0; i < durOnnx.length; i++) { |
| durOnnx[i] *= durationFactor; |
| } |
| const durReshaped = []; |
| for (let b = 0; b < bsz; b++) { |
| durReshaped.push([[durOnnx[b]]]); |
| } |
| |
| |
| const textEncResult = await models.textEncOrt.run({ |
| text_ids: intArrayToTensor(textIds, textIdsShape), |
| style_ttl: styleTtlTensor, |
| text_mask: textMaskTensor |
| }); |
| |
| const textEmbTensor = textEncResult.text_emb; |
| |
| |
| let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
| const latentShape = [bsz, noisyLatent[0].length, noisyLatent[0][0].length]; |
| const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
| const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
| |
| const totalStepArray = new Array(bsz).fill(totalStep); |
| const scalarShape = [bsz]; |
| const totalStepTensor = arrayToTensor(totalStepArray, scalarShape); |
| |
| for (let step = 0; step < totalStep; step++) { |
| const currentStepArray = new Array(bsz).fill(step); |
| |
| const vectorEstResult = await models.vectorEstOrt.run({ |
| noisy_latent: arrayToTensor(noisyLatent, latentShape), |
| text_emb: textEmbTensor, |
| style_ttl: styleTtlTensor, |
| text_mask: textMaskTensor, |
| latent_mask: latentMaskTensor, |
| total_step: totalStepTensor, |
| current_step: arrayToTensor(currentStepArray, scalarShape) |
| }); |
| |
| const denoisedLatent = Array.from(vectorEstResult.denoised_latent.data); |
| |
| |
| let idx = 0; |
| for (let b = 0; b < noisyLatent.length; b++) { |
| for (let d = 0; d < noisyLatent[b].length; d++) { |
| for (let t = 0; t < noisyLatent[b][d].length; t++) { |
| noisyLatent[b][d][t] = denoisedLatent[idx++]; |
| } |
| } |
| } |
| } |
| |
| |
| const vocoderResult = await models.vocoderOrt.run({ |
| latent: arrayToTensor(noisyLatent, latentShape) |
| }); |
| |
| |
| } catch (error) { |
| console.warn('Warmup failed (non-critical):', error.message); |
| |
| } |
| } |
|
|
| |
| async function initializeModels() { |
| |
| if (modelsLoading && modelsLoadPromise) { |
| return modelsLoadPromise; |
| } |
| |
| |
| if (modelsLoaded && models) { |
| return; |
| } |
| |
| modelsLoading = true; |
| |
| const speakerItemsForLoading = document.querySelectorAll('.speaker-item[data-voice]'); |
| speakerItemsForLoading.forEach(item => item.classList.add('disabled')); |
| |
| |
| const languageItemsForLoading = document.querySelectorAll('.speaker-item[data-language]'); |
| languageItemsForLoading.forEach(item => item.classList.add('disabled')); |
| |
| modelsLoadPromise = (async () => { |
| try { |
| showDemoStatus('<strong>Loading configuration...</strong>', 'info', 5); |
| |
| const basePath = 'assets/onnx'; |
| |
| |
| cfgs = await loadCfgs(basePath); |
| |
| |
| showDemoStatus('<strong>Checking WebGPU support...</strong>', 'info', 8); |
| const webgpuCheck = await checkWebGPUSupport(); |
| |
| |
| const useWebGPU = webgpuCheck.supported; |
| const executionProvider = useWebGPU ? 'webgpu' : 'wasm'; |
| |
| |
| if (!useWebGPU) { |
| showWasmWarning(); |
| } |
| |
| |
| const backendName = useWebGPU ? 'WebGPU' : 'WASM'; |
| showDemoStatus(`<strong>${backendName} detected! Loading models...</strong>`, 'info', 10); |
| |
| const modelsLoadPromise = loadOnnxAll(basePath, { |
| executionProviders: [executionProvider], |
| graphOptimizationLevel: 'all' |
| }, (modelName, current, total) => { |
| const progress = 10 + (current / total) * 70; |
| showDemoStatus(`<strong>Loading models with ${backendName} (${current}/${total}):</strong> ${modelName}...`, 'info', progress); |
| }); |
| |
| |
| const [loadedModels, loadedProcessors] = await Promise.all([ |
| modelsLoadPromise, |
| loadProcessors(basePath) |
| ]); |
| |
| models = loadedModels; |
| processors = loadedProcessors; |
| showDemoStatus('<strong>Loading reference embeddings...</strong>', 'info', 85); |
| |
| |
| const embeddings = await loadStyleEmbeddings(currentVoice); |
| currentStyleTtlTensor = embeddings.styleTtl; |
| currentStyleDpTensor = embeddings.styleDp; |
| |
| showDemoStatus('<strong>Warming up models...</strong>', 'info', 90); |
| |
| |
| await warmupModels(); |
| |
| hideDemoStatus(); |
| |
| demoGenerateBtn.disabled = false; |
| demoTotalSteps.disabled = false; |
| demoSpeed.disabled = false; |
| |
| |
| const voiceToggleTexts = document.querySelectorAll('.voice-toggle-text'); |
| voiceToggleTexts.forEach(text => text.classList.remove('disabled')); |
| |
| |
| updateCharCounter(); |
| |
| |
| modelsLoaded = true; |
| modelsLoading = false; |
| |
| |
| speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| |
| |
| languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| |
| } catch (error) { |
| modelsLoading = false; |
| |
| speakerItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| |
| |
| languageItemsForLoading.forEach(item => item.classList.remove('disabled')); |
| showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
| showDemoError(`Failed to initialize: ${error.message}. Check console for details.`); |
| throw error; |
| } |
| })(); |
| |
| return modelsLoadPromise; |
| } |
|
|
|
|
| |
| async function generateSupertonicSpeech(text, totalStep, durationFactor) { |
| const supertonicStartTime = Date.now(); |
| |
| try { |
| const textList = [text]; |
| const bsz = 1; |
| const sampleRate = cfgs.ae.sample_rate; |
| |
| |
| const styleTtlTensor = currentStyleTtlTensor; |
| const styleDpTensor = currentStyleDpTensor; |
| |
| |
| const { textIds, textMask, unsupportedChars } = processors.textProcessor.call(textList, currentLanguage); |
| |
| |
| if (unsupportedChars && unsupportedChars.length > 0) { |
| const charList = unsupportedChars.map(c => `"${c}"`).join(', '); |
| throw new Error(`Unsupported characters: ${charList}`); |
| } |
| |
| const textIdsShape = [bsz, textIds[0].length]; |
| const textMaskShape = [bsz, 1, textMask[0][0].length]; |
| const textMaskTensor = arrayToTensor(textMask, textMaskShape); |
| |
| const dpResult = await models.dpOrt.run({ |
| text_ids: intArrayToTensor(textIds, textIdsShape), |
| style_dp: styleDpTensor, |
| text_mask: textMaskTensor |
| }); |
| |
| const durOnnx = Array.from(dpResult.duration.data); |
| |
| for (let i = 0; i < durOnnx.length; i++) { |
| durOnnx[i] *= durationFactor; |
| } |
| const durReshaped = []; |
| for (let b = 0; b < bsz; b++) { |
| durReshaped.push([[durOnnx[b]]]); |
| } |
| |
| |
| const textEncResult = await models.textEncOrt.run({ |
| text_ids: intArrayToTensor(textIds, textIdsShape), |
| style_ttl: styleTtlTensor, |
| text_mask: textMaskTensor |
| }); |
| |
| const textEmbTensor = textEncResult.text_emb; |
| |
| |
| let { noisyLatent, latentMask } = sampleNoisyLatent(durReshaped, cfgs); |
| const latentDim = noisyLatent[0].length; |
| const latentLen = noisyLatent[0][0].length; |
| const latentShape = [bsz, latentDim, latentLen]; |
| const latentMaskShape = [bsz, 1, latentMask[0][0].length]; |
| const latentMaskTensor = arrayToTensor(latentMask, latentMaskShape); |
| |
| |
| const latentBufferSize = bsz * latentDim * latentLen; |
| const latentBuffer = new Float32Array(latentBufferSize); |
| |
| |
| let initIdx = 0; |
| for (let b = 0; b < bsz; b++) { |
| for (let d = 0; d < latentDim; d++) { |
| for (let t = 0; t < latentLen; t++) { |
| latentBuffer[initIdx++] = noisyLatent[b][d][t]; |
| } |
| } |
| } |
| |
| |
| const scalarShape = [bsz]; |
| const totalStepTensor = arrayToTensor(new Array(bsz).fill(totalStep), scalarShape); |
| |
| |
| const stepTensors = []; |
| for (let step = 0; step < totalStep; step++) { |
| stepTensors.push(arrayToTensor(new Array(bsz).fill(step), scalarShape)); |
| } |
| |
| for (let step = 0; step < totalStep; step++) { |
| |
| const noisyLatentTensor = new ort.Tensor('float32', latentBuffer, latentShape); |
| |
| const vectorEstResult = await models.vectorEstOrt.run({ |
| noisy_latent: noisyLatentTensor, |
| text_emb: textEmbTensor, |
| style_ttl: styleTtlTensor, |
| text_mask: textMaskTensor, |
| latent_mask: latentMaskTensor, |
| total_step: totalStepTensor, |
| current_step: stepTensors[step] |
| }); |
| |
| |
| const denoisedData = vectorEstResult.denoised_latent.data; |
| latentBuffer.set(denoisedData); |
| } |
| |
| |
| const vocoderResult = await models.vocoderOrt.run({ |
| latent: new ort.Tensor('float32', latentBuffer, latentShape) |
| }); |
| |
| const wavBatch = vocoderResult.wav_tts.data; |
| const wavLen = Math.floor(sampleRate * durOnnx[0]); |
| |
| const audioData = wavBatch.slice(0, wavLen); |
| |
| |
| const supertonicEndTime = Date.now(); |
| const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
| const audioDurationSec = durOnnx[0]; |
| |
| return { |
| success: true, |
| processingTime: supertonicProcessingTime, |
| audioDuration: audioDurationSec, |
| audioData: audioData, |
| sampleRate: sampleRate, |
| text: text |
| }; |
| } catch (error) { |
| return { |
| success: false, |
| error: error.message, |
| text: text |
| }; |
| } |
| } |
|
|
| |
| function formatTimeDetailed(seconds) { |
| const hours = Math.floor(seconds / 3600); |
| const mins = Math.floor((seconds % 3600) / 60); |
| const secs = seconds % 60; |
| const ms = Math.floor((secs % 1) * 100); |
| const wholeSecs = Math.floor(secs); |
| |
| if (seconds < 60) { |
| return `${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| } else if (seconds < 3600) { |
| return `${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| } else { |
| return `${hours.toString().padStart(2, '0')}:${mins.toString().padStart(2, '0')}:${wholeSecs.toString().padStart(2, '0')}.${ms.toString().padStart(2, '0')}`; |
| } |
| } |
|
|
| |
| async function generateSupertonicSpeechChunked(text, totalStep, durationFactor, onFirstChunkReady, onChunkAdded) { |
| const supertonicStartTime = Date.now(); |
| const sampleRate = cfgs.ae.sample_rate; |
| const silenceDuration = 0.3; |
| |
| try { |
| |
| const chunks = chunkText(text); |
| |
| const audioDataArrays = []; |
| const durations = []; |
| const silenceSamples = Math.floor(silenceDuration * sampleRate); |
| let firstChunkEndTime = 0; |
| let firstChunkTime = 0; |
| |
| |
| for (let i = 0; i < chunks.length; i++) { |
| const chunkText = chunks[i]; |
| |
| const result = await generateSupertonicSpeech(chunkText, totalStep, durationFactor); |
| |
| if (!result.success) { |
| throw new Error(`Failed to generate chunk ${i + 1}: ${result.error}`); |
| } |
| |
| |
| const audioData = result.audioData; |
| |
| audioDataArrays.push(audioData); |
| durations.push(result.audioDuration); |
| |
| |
| if (i === 0 && onFirstChunkReady) { |
| |
| firstChunkEndTime = Date.now(); |
| firstChunkTime = (firstChunkEndTime - supertonicStartTime) / 1000; |
| |
| const totalDurationSoFar = result.audioDuration; |
| const processedChars = chunks[0].length; |
| |
| onFirstChunkReady(audioData, sampleRate, totalDurationSoFar, text, chunks.length, firstChunkTime, processedChars); |
| } else if (i > 0 && onChunkAdded) { |
| |
| const totalDurationSoFar = durations.slice(0, i + 1).reduce((sum, dur) => sum + dur, 0) + silenceDuration * i; |
| const currentProcessingTime = (Date.now() - supertonicStartTime) / 1000; |
| const processedChars = chunks.slice(0, i + 1).reduce((sum, chunk) => sum + chunk.length, 0); |
| |
| onChunkAdded(audioData, sampleRate, totalDurationSoFar, i + 1, chunks.length, currentProcessingTime, processedChars); |
| } |
| } |
| |
| |
| const totalDuration = durations.reduce((sum, dur) => sum + dur, 0) + silenceDuration * (chunks.length - 1); |
| |
| |
| let totalSamples = 0; |
| for (let i = 0; i < audioDataArrays.length; i++) { |
| totalSamples += audioDataArrays[i].length; |
| if (i < audioDataArrays.length - 1) { |
| totalSamples += silenceSamples; |
| } |
| } |
| |
| const wavCat = new Float32Array(totalSamples); |
| |
| let currentIdx = 0; |
| for (let i = 0; i < audioDataArrays.length; i++) { |
| |
| const audioData = audioDataArrays[i]; |
| wavCat.set(audioData, currentIdx); |
| currentIdx += audioData.length; |
| |
| |
| if (i < audioDataArrays.length - 1) { |
| |
| currentIdx += silenceSamples; |
| } |
| } |
| |
| |
| const wavBuffer = writeWavFile(wavCat, sampleRate); |
| const blob = new Blob([wavBuffer], { type: 'audio/wav' }); |
| const url = URL.createObjectURL(blob); |
| |
| const supertonicEndTime = Date.now(); |
| const supertonicProcessingTime = (supertonicEndTime - supertonicStartTime) / 1000; |
| |
| return { |
| success: true, |
| processingTime: supertonicProcessingTime, |
| audioDuration: totalDuration, |
| url: url, |
| text: text, |
| firstChunkTime: firstChunkTime |
| }; |
| } catch (error) { |
| return { |
| success: false, |
| error: error.message, |
| text: text |
| }; |
| } |
| } |
|
|
| |
| async function generateSpeech() { |
| let text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| |
| |
| const validation = validateTextInput(text); |
| if (!validation.valid) { |
| showDemoError(validation.message); |
| return; |
| } |
| |
| if (!models || !cfgs || !processors) { |
| showDemoError('Models are still loading. Please wait.'); |
| return; |
| } |
| |
| if (!currentStyleTtlTensor || !currentStyleDpTensor) { |
| showDemoError('Reference embeddings are not ready. Please wait.'); |
| return; |
| } |
| |
| |
| const charValidation = validateCharacters(text); |
| if (!charValidation.valid && charValidation.unsupportedChars.length > 0) { |
| const charList = charValidation.unsupportedChars.map(c => `"${c}"`).join(', '); |
| showDemoError(`Cannot generate speech: Unsupported characters found: ${charList}`); |
| return; |
| } |
|
|
| currentGenerationTextLength = text.length; |
| |
| try { |
| isGenerating = true; |
| demoGenerateBtn.disabled = true; |
| |
| |
| const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
| speakerItemsForGeneration.forEach(item => item.classList.add('disabled')); |
| |
| |
| const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
| languageItemsForGeneration.forEach(item => item.classList.add('disabled')); |
| |
| hideDemoError(); |
| hideDemoStatus(); |
| |
| |
| if (audioContext) { |
| |
| scheduledSources.forEach(source => { |
| try { |
| source.stop(); |
| } catch (e) { |
| |
| } |
| }); |
| scheduledSources = []; |
| |
| |
| if (audioContext.state !== 'closed') { |
| audioContext.close(); |
| } |
| audioContext = null; |
| } |
| |
| |
| if (animationFrameId) { |
| cancelAnimationFrame(animationFrameId); |
| animationFrameId = null; |
| } |
| |
| |
| customAudioPlayers.forEach(player => { |
| if (player.cleanup) { |
| player.cleanup(); |
| } |
| }); |
| customAudioPlayers = []; |
| |
| |
| audioChunks = []; |
| totalDuration = 0; |
| startTime = 0; |
| pauseTime = 0; |
| isPaused = false; |
| isPlaying = false; |
| firstChunkGenerationTime = 0; |
| totalChunks = 0; |
| nextScheduledTime = 0; |
| |
| |
| const createInitialResultItem = (system, titleMain, titleSub, titleColor, includeStatus) => { |
| const titleStatus = includeStatus |
| ? `<span class="title-status status-running" id="${system}-status">⏳ Running...</span>` |
| : ''; |
| return ` |
| <div class="demo-result-item ${system}-result-item generating" id="${system}-result" style="--result-progress: 0%;"> |
| <div class="demo-result-title"> |
| <span class="title-main" style="color: ${titleColor};">${titleMain}</span> |
| <span class="title-sub">${titleSub}</span> |
| ${titleStatus} |
| </div> |
| <div class="demo-result-info"> |
| <!-- |
| <div class="stat"> |
| <div class="stat-value" id="${system}-chars">--</div> |
| <div class="stat-label">Processed Chars</div> |
| </div> |
| --> |
| <div class="stat"> |
| <div class="stat-value" id="${system}-time">--</div> |
| <div class="stat-label">Processing Time<span class="stat-arrow stat-arrow--down">↓</span></div> |
| </div> |
| <div class="stat"> |
| <div class="stat-value" id="${system}-cps">--</div> |
| <div class="stat-label">Chars/sec<span class="stat-arrow stat-arrow--up">↑</span></div> |
| </div> |
| <div class="stat"> |
| <div class="stat-value" id="${system}-rtf">--</div> |
| <div class="stat-label">RTF<span class="stat-arrow stat-arrow--down">↓</span></div> |
| </div> |
| </div> |
| <div class="custom-audio-player"> |
| <div class="demo-placeholder-audio">Generating speech...</div> |
| </div> |
| </div> |
| `; |
| }; |
| const supertonicInitial = createInitialResultItem( |
| 'supertonic', |
| 'Supertonic', |
| 'On-Device', |
| 'var(--accent-yellow)', |
| false |
| ); |
| demoResults.style.display = 'flex'; |
| demoResults.innerHTML = supertonicInitial; |
| |
| const totalStep = parseInt(demoTotalSteps.value); |
| const speed = parseFloat(demoSpeed.value); |
| const durationFactor = speedToDurationFactor(speed); |
| |
| |
| let latestSupertonicProcessedChars = 0; |
| |
| |
| const formatTime = (seconds, { trimMobile = false } = {}) => { |
| const mins = Math.floor(seconds / 60); |
| const secs = seconds % 60; |
| const secString = secs.toFixed(2).padStart(5, '0'); |
| let formatted = `${mins}:${secString}`; |
| if (trimMobile) { |
| formatted = trimDecimalsForMobile(formatted); |
| } |
| return formatted; |
| }; |
| |
| const updateProgress = () => { |
| if (!isPlaying || !audioContext) return; |
| |
| const currentTime = isPaused ? pauseTime : (audioContext.currentTime - startTime); |
| const progress = totalDuration > 0 ? (currentTime / totalDuration) * 100 : 0; |
| |
| if (progressFill) { |
| progressFill.style.width = `${Math.min(progress, 100)}%`; |
| } |
| if (currentTimeDisplay) { |
| currentTimeDisplay.textContent = formatTime(Math.min(currentTime, totalDuration), { trimMobile: true }); |
| } |
| |
| if (currentTime < totalDuration) { |
| animationFrameId = requestAnimationFrame(updateProgress); |
| } else { |
| |
| isPlaying = false; |
| isPaused = false; |
| if (playPauseBtn) { |
| playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| } |
| } |
| }; |
| |
| const togglePlayPause = () => { |
| if (!audioContext || audioChunks.length === 0) return; |
| |
| if (isPaused) { |
| |
| pauseAllPlayersExcept(supertonicPlayerRecord); |
| |
| const seekTime = pauseTime; |
| |
| |
| let accumulatedTime = 0; |
| let startChunkIndex = 0; |
| let offsetInChunk = seekTime; |
| |
| for (let i = 0; i < audioChunks.length; i++) { |
| const chunkDuration = audioChunks[i].buffer.duration; |
| if (accumulatedTime + chunkDuration > seekTime) { |
| startChunkIndex = i; |
| offsetInChunk = seekTime - accumulatedTime; |
| break; |
| } |
| accumulatedTime += chunkDuration + 0.3; |
| } |
| |
| |
| scheduledSources.forEach(source => { |
| try { |
| source.stop(); |
| } catch (e) { |
| |
| } |
| }); |
| scheduledSources = []; |
| |
| |
| if (audioContext.state === 'suspended') { |
| audioContext.resume(); |
| } |
| |
| |
| startTime = audioContext.currentTime - seekTime; |
| let nextStartTime = audioContext.currentTime; |
| |
| for (let i = startChunkIndex; i < audioChunks.length; i++) { |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioChunks[i].buffer; |
| source.connect(audioContext.destination); |
| |
| if (i === startChunkIndex) { |
| source.start(nextStartTime, offsetInChunk); |
| nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
| } else { |
| source.start(nextStartTime); |
| nextStartTime += audioChunks[i].buffer.duration; |
| } |
| |
| if (i < audioChunks.length - 1) { |
| nextStartTime += 0.3; |
| } |
| |
| scheduledSources.push(source); |
| } |
| |
| nextScheduledTime = nextStartTime; |
| |
| isPaused = false; |
| isPlaying = true; |
| playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| updateProgress(); |
| } else if (isPlaying) { |
| |
| pauseTime = audioContext.currentTime - startTime; |
| audioContext.suspend(); |
| isPaused = true; |
| playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| if (animationFrameId) { |
| cancelAnimationFrame(animationFrameId); |
| } |
| } else { |
| |
| pauseAllPlayersExcept(supertonicPlayerRecord); |
| |
| pauseTime = 0; |
| |
| |
| if (audioContext.state === 'suspended') { |
| audioContext.resume(); |
| } |
| |
| |
| scheduledSources.forEach(source => { |
| try { |
| source.stop(); |
| } catch (e) { |
| |
| } |
| }); |
| scheduledSources = []; |
| |
| |
| startTime = audioContext.currentTime; |
| let nextStartTime = audioContext.currentTime; |
| |
| for (let i = 0; i < audioChunks.length; i++) { |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioChunks[i].buffer; |
| source.connect(audioContext.destination); |
| source.start(nextStartTime); |
| nextStartTime += audioChunks[i].buffer.duration; |
| |
| if (i < audioChunks.length - 1) { |
| nextStartTime += 0.3; |
| } |
| |
| scheduledSources.push(source); |
| } |
| |
| nextScheduledTime = nextStartTime; |
| |
| isPlaying = true; |
| isPaused = false; |
| playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| updateProgress(); |
| } |
| }; |
| |
| const seekTo = (percentage) => { |
| if (!audioContext || audioChunks.length === 0) return; |
| |
| const seekTime = (percentage / 100) * totalDuration; |
| |
| |
| const wasPlaying = isPlaying; |
| const wasPaused = isPaused; |
| |
| |
| scheduledSources.forEach(source => { |
| try { |
| source.stop(); |
| } catch (e) { |
| |
| } |
| }); |
| scheduledSources = []; |
| |
| |
| if (animationFrameId) { |
| cancelAnimationFrame(animationFrameId); |
| } |
| |
| |
| let accumulatedTime = 0; |
| let startChunkIndex = 0; |
| let offsetInChunk = seekTime; |
| |
| for (let i = 0; i < audioChunks.length; i++) { |
| const chunkDuration = audioChunks[i].buffer.duration; |
| if (accumulatedTime + chunkDuration > seekTime) { |
| startChunkIndex = i; |
| offsetInChunk = seekTime - accumulatedTime; |
| break; |
| } |
| accumulatedTime += chunkDuration + 0.3; |
| } |
| |
| |
| if (wasPaused || !wasPlaying) { |
| pauseTime = seekTime; |
| |
| |
| if (progressFill) { |
| const progress = (seekTime / totalDuration) * 100; |
| progressFill.style.width = `${Math.min(progress, 100)}%`; |
| } |
| if (currentTimeDisplay) { |
| currentTimeDisplay.textContent = formatTime(seekTime, { trimMobile: true }); |
| } |
| |
| |
| isPaused = true; |
| isPlaying = true; |
| |
| if (playPauseBtn) { |
| playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| } |
| |
| return; |
| } |
| |
| |
| if (audioContext.state === 'suspended') { |
| audioContext.resume(); |
| } |
| |
| |
| startTime = audioContext.currentTime - seekTime; |
| let nextStartTime = audioContext.currentTime; |
| |
| for (let i = startChunkIndex; i < audioChunks.length; i++) { |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioChunks[i].buffer; |
| source.connect(audioContext.destination); |
| |
| if (i === startChunkIndex) { |
| |
| source.start(nextStartTime, offsetInChunk); |
| nextStartTime += (audioChunks[i].buffer.duration - offsetInChunk); |
| } else { |
| source.start(nextStartTime); |
| nextStartTime += audioChunks[i].buffer.duration; |
| } |
| |
| |
| if (i < audioChunks.length - 1) { |
| nextStartTime += 0.3; |
| } |
| |
| scheduledSources.push(source); |
| } |
| |
| |
| nextScheduledTime = nextStartTime; |
| |
| |
| isPlaying = true; |
| isPaused = false; |
| if (playPauseBtn) { |
| playPauseBtn.innerHTML = PAUSE_ICON_SVG; |
| } |
| |
| |
| updateProgress(); |
| }; |
| |
| |
| |
| const createAudioBufferFromFloat32 = (audioData, sampleRate) => { |
| const audioBuffer = audioContext.createBuffer(1, audioData.length, sampleRate); |
| audioBuffer.getChannelData(0).set(audioData); |
| return audioBuffer; |
| }; |
| |
| const onFirstChunkReady = async (audioData, sampleRate, duration, text, numChunks, firstChunkTime, processedChars) => { |
| totalChunks = numChunks; |
| firstChunkGenerationTime = firstChunkTime; |
| |
| const container = document.getElementById('demoResults'); |
| |
|
|
| const textLength = currentGenerationTextLength > 0 |
| ? currentGenerationTextLength |
| : (text ? text.length : 0); |
| const isBatch = textLength >= getMaxChunkLength(); |
| const processingTimeStr = isBatch && firstChunkTime |
| ? `${formatTimeDetailed(firstChunkTime)} / ${formatTimeDetailed(firstChunkTime)}` |
| : formatTimeDetailed(firstChunkTime); |
| const safeInitialChars = typeof processedChars === 'number' ? processedChars : 0; |
| const displayedInitialChars = textLength > 0 ? Math.min(safeInitialChars, textLength) : safeInitialChars; |
| const charsPerSec = firstChunkTime > 0 && displayedInitialChars > 0 |
| ? (displayedInitialChars / firstChunkTime).toFixed(1) |
| : '0.0'; |
| const rtf = duration > 0 && firstChunkTime > 0 ? (firstChunkTime / duration).toFixed(3) : '-'; |
| const progressValue = textLength > 0 ? Math.min(100, (displayedInitialChars / textLength) * 100) : 0; |
|
|
| const resultItemEl = document.getElementById('supertonic-result'); |
| if (!resultItemEl) { |
| console.warn('Supertonic result container not found.'); |
| return; |
| } |
|
|
| resultItemEl.classList.remove('generating'); |
| resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
|
|
| const titleMainEl = resultItemEl.querySelector('.title-main'); |
| if (titleMainEl) { |
| titleMainEl.textContent = 'Supertonic'; |
| titleMainEl.style.color = 'var(--accent-yellow)'; |
| } |
| const titleSubEl = resultItemEl.querySelector('.title-sub'); |
| if (titleSubEl) { |
| titleSubEl.textContent = 'On-Device'; |
| } |
|
|
| const infoContainer = resultItemEl.querySelector('.demo-result-info'); |
| if (infoContainer) { |
| infoContainer.classList.remove('error'); |
| } |
| const timeElInitial = document.getElementById('supertonic-time'); |
| if (timeElInitial) { |
| timeElInitial.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
| } |
| const cpsElInitial = document.getElementById('supertonic-cps'); |
| if (cpsElInitial) { |
| cpsElInitial.textContent = charsPerSec; |
| } |
| const rtfElInitial = document.getElementById('supertonic-rtf'); |
| if (rtfElInitial) { |
| rtfElInitial.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| } |
|
|
| const playerContainer = resultItemEl.querySelector('.custom-audio-player'); |
| if (playerContainer) { |
| playerContainer.style.display = ''; |
| playerContainer.innerHTML = ` |
| <button id="play-pause-btn" class="player-btn">${PAUSE_ICON_SVG}</button> |
| <div class="time-display" id="current-time">0:00.00</div> |
| <div class="progress-container" id="progress-container"> |
| <div class="progress-bar"> |
| <div class="progress-fill" id="progress-fill"></div> |
| </div> |
| </div> |
| <div class="time-display" id="total-duration">${formatTime(duration, { trimMobile: true })}</div> |
| <div class="demo-result-actions" style="display: none;"> |
| <button class="demo-download-btn" id="supertonic-download" aria-label="Download WAV" title="Download WAV"> |
| <svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" viewBox="0 0 24 24"> |
| <path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/> |
| <polyline points="7 10 12 15 17 10"/> |
| <line x1="12" y1="15" x2="12" y2="3"/> |
| </svg> |
| </button> |
| </div> |
| `; |
| } |
|
|
| container.style.display = 'flex'; |
| latestSupertonicProcessedChars = displayedInitialChars; |
| |
| |
| playPauseBtn = document.getElementById('play-pause-btn'); |
| progressBar = document.getElementById('progress-container'); |
| currentTimeDisplay = document.getElementById('current-time'); |
| durationDisplay = document.getElementById('total-duration'); |
| progressFill = document.getElementById('progress-fill'); |
| |
| |
| audioContext = new (window.AudioContext || window.webkitAudioContext)(); |
| startTime = audioContext.currentTime; |
| totalDuration = duration; |
| isPlaying = true; |
| isPaused = false; |
| |
| |
| const pausePlayback = () => { |
| if (!audioContext || audioContext.state === 'closed') return; |
| if (isPlaying) { |
| pauseTime = audioContext.currentTime - startTime; |
| scheduledSources.forEach(source => { |
| try { |
| source.stop(); |
| } catch (e) { |
| |
| } |
| }); |
| scheduledSources = []; |
| audioContext.suspend(); |
| isPaused = true; |
| isPlaying = false; |
| if (playPauseBtn) { |
| playPauseBtn.innerHTML = PLAY_ICON_SVG; |
| } |
| if (animationFrameId) { |
| cancelAnimationFrame(animationFrameId); |
| } |
| } |
| }; |
| |
| supertonicPlayerRecord = { |
| audioContext: audioContext, |
| pausePlayback: pausePlayback |
| }; |
| |
| |
| customAudioPlayers = customAudioPlayers.filter(p => p !== supertonicPlayerRecord && p.audioContext !== audioContext); |
| customAudioPlayers.push(supertonicPlayerRecord); |
| |
| |
| pauseAllPlayersExcept(supertonicPlayerRecord); |
| |
| |
| const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
| |
| audioChunks.push({ buffer: audioBuffer, duration: audioBuffer.duration }); |
| |
| |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioBuffer; |
| source.connect(audioContext.destination); |
| source.start(audioContext.currentTime); |
| scheduledSources.push(source); |
| |
| |
| nextScheduledTime = audioContext.currentTime + audioBuffer.duration + 0.3; |
| |
| |
| playPauseBtn.addEventListener('click', togglePlayPause); |
| |
| progressBar.addEventListener('click', (e) => { |
| const rect = progressBar.getBoundingClientRect(); |
| const percentage = ((e.clientX - rect.left) / rect.width) * 100; |
| seekTo(percentage); |
| }); |
| |
| |
| updateProgress(); |
| }; |
| |
| |
| const onChunkAdded = async (audioData, sampleRate, duration, chunkIndex, totalChunks, currentProcessingTime, processedChars) => { |
| if (!audioContext) return; |
| |
| |
| const audioBuffer = createAudioBufferFromFloat32(audioData, sampleRate); |
| |
| const chunkDuration = audioBuffer.duration; |
| audioChunks.push({ buffer: audioBuffer, duration: chunkDuration }); |
| |
| |
| const source = audioContext.createBufferSource(); |
| source.buffer = audioBuffer; |
| source.connect(audioContext.destination); |
| source.start(nextScheduledTime); |
| scheduledSources.push(source); |
| |
| |
| nextScheduledTime = nextScheduledTime + audioBuffer.duration + 0.3; |
| |
| |
| totalDuration = duration; |
| |
| |
| if (durationDisplay) { |
| durationDisplay.textContent = formatTime(duration, { trimMobile: true }); |
| durationDisplay.style.transition = 'color 0.3s'; |
| durationDisplay.style.color = '#ffffff'; |
| setTimeout(() => { |
| durationDisplay.style.color = ''; |
| }, 300); |
| } |
| |
| |
| const textLengthCandidate = currentGenerationTextLength > 0 |
| ? currentGenerationTextLength |
| : (demoTextInput.textContent || demoTextInput.innerText || '').trim().length; |
| const textLength = textLengthCandidate; |
| const isBatch = textLength >= getMaxChunkLength(); |
| const timeEl = document.getElementById('supertonic-time'); |
| const durationEl = document.getElementById('supertonic-duration'); |
| const cpsEl = document.getElementById('supertonic-cps'); |
| const rtfEl = document.getElementById('supertonic-rtf'); |
| const effectiveProcessedChars = typeof processedChars === 'number' ? processedChars : latestSupertonicProcessedChars; |
|
|
| if (effectiveProcessedChars < latestSupertonicProcessedChars) { |
| return; |
| } |
|
|
| const clampedProcessedChars = textLength > 0 ? Math.min(effectiveProcessedChars, textLength) : effectiveProcessedChars; |
| const progressValue = textLength > 0 ? Math.min(100, (clampedProcessedChars / textLength) * 100) : 0; |
| if (durationEl) { |
| durationEl.textContent = formatTimeDetailed(duration); |
| } |
| if (timeEl && isBatch && firstChunkGenerationTime > 0 && currentProcessingTime) { |
| const timeDisplay = `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(currentProcessingTime)}`; |
| timeEl.innerHTML = formatStatValueWithSuffix(timeDisplay, 's', { firstLabel: true }); |
| } |
| if (cpsEl && currentProcessingTime > 0 && clampedProcessedChars >= 0) { |
| const charsPerSec = (clampedProcessedChars / currentProcessingTime).toFixed(1); |
| cpsEl.textContent = charsPerSec; |
| } |
| if (rtfEl && duration > 0 && currentProcessingTime > 0) { |
| const rtf = (currentProcessingTime / duration).toFixed(3); |
| rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| } |
| const resultItemEl = document.getElementById('supertonic-result'); |
| if (resultItemEl) { |
| resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
| } |
| latestSupertonicProcessedChars = clampedProcessedChars; |
| }; |
| |
| |
| const result = await generateSupertonicSpeechChunked( |
| text, |
| totalStep, |
| durationFactor, |
| onFirstChunkReady, |
| onChunkAdded |
| ); |
| |
| if (result.success) { |
| const textLength = result.text ? result.text.length : 0; |
| const isBatch = textLength >= getMaxChunkLength(); |
| const processingTimeStr = isBatch && firstChunkGenerationTime > 0 |
| ? `${formatTimeDetailed(firstChunkGenerationTime)} / ${formatTimeDetailed(result.processingTime)}` |
| : formatTimeDetailed(result.processingTime); |
| const charsPerSec = result.processingTime > 0 ? (textLength / result.processingTime).toFixed(1) : '0.0'; |
| const progressValue = textLength > 0 ? 100 : 0; |
| |
| const timeEl = document.getElementById('supertonic-time'); |
| const durationEl = document.getElementById('supertonic-duration'); |
| const cpsEl = document.getElementById('supertonic-cps'); |
| const rtfEl = document.getElementById('supertonic-rtf'); |
| |
| if (timeEl) timeEl.innerHTML = formatStatValueWithSuffix(processingTimeStr, 's', { firstLabel: true }); |
| if (durationEl) durationEl.textContent = formatTimeDetailed(result.audioDuration); |
| latestSupertonicProcessedChars = textLength; |
| if (cpsEl) cpsEl.textContent = charsPerSec; |
| if (rtfEl) { |
| const rtf = result.audioDuration > 0 ? (result.processingTime / result.audioDuration).toFixed(3) : '-'; |
| rtfEl.innerHTML = formatStatValueWithSuffix(rtf, 'x'); |
| } |
| const resultItemEl = document.getElementById('supertonic-result'); |
| if (resultItemEl) { |
| resultItemEl.style.setProperty('--result-progress', `${progressValue}%`); |
| } |
| |
| |
| if (audioContext && audioChunks.length > 0) { |
| totalDuration = result.audioDuration; |
| if (durationDisplay) { |
| durationDisplay.textContent = formatTime(result.audioDuration, { trimMobile: true }); |
| } |
| } |
| |
| |
| const downloadBtn = document.getElementById('supertonic-download'); |
| if (downloadBtn) { |
| downloadBtn.parentElement.style.display = 'block'; |
| downloadBtn.onclick = () => downloadDemoAudio(result.url, 'supertonic_speech.wav'); |
| } |
| } |
| |
| } catch (error) { |
| showDemoStatus(`<strong>Error:</strong> ${error.message}`, 'error'); |
| showDemoError(`Error during synthesis: ${error.message}`); |
| console.error('Synthesis error:', error); |
| |
| |
| demoResults.style.display = 'none'; |
| demoResults.innerHTML = ` |
| <div class="demo-placeholder"> |
| <div class="demo-placeholder-icon">🎙️</div> |
| <p>Your generated speech will appear here</p> |
| </div> |
| `; |
| } finally { |
| isGenerating = false; |
| demoGenerateBtn.disabled = false; |
| |
| |
| const speakerItemsForGeneration = document.querySelectorAll('.speaker-item[data-voice]'); |
| speakerItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
| |
| |
| const languageItemsForGeneration = document.querySelectorAll('.speaker-item[data-language]'); |
| languageItemsForGeneration.forEach(item => item.classList.remove('disabled')); |
| } |
| } |
|
|
| |
| window.downloadDemoAudio = function(url, filename) { |
| const a = document.createElement('a'); |
| a.href = url; |
| a.download = filename; |
| a.click(); |
| }; |
|
|
| |
| function speedToDurationFactor(speed, offset=0.05) { |
| return 1 / (speed + offset); |
| } |
|
|
| |
| function updateSliderValues() { |
| demoTotalStepsValue.textContent = demoTotalSteps.value + ' Steps'; |
| |
| const speed = parseFloat(demoSpeed.value); |
| demoSpeedValue.textContent = speed.toFixed(2) + 'x'; |
| } |
| |
| |
| demoTotalSteps.addEventListener('input', updateSliderValues); |
| demoSpeed.addEventListener('input', updateSliderValues); |
| |
| |
| updateSliderValues(); |
|
|
| |
| demoGenerateBtn.addEventListener('click', generateSpeech); |
|
|
| |
| const presetItems = document.querySelectorAll('.preset-item[data-preset]'); |
| const freeformBtn = document.getElementById('freeformBtn'); |
| let currentPreset = 'quote'; |
| |
| let isPresetChanging = false; |
| |
| |
| function updateActiveButton(presetType) { |
| |
| presetItems.forEach(item => item.classList.remove('active')); |
| |
| |
| if (presetType) { |
| const targetItem = document.querySelector(`.preset-item[data-preset="${presetType}"]`); |
| if (targetItem) { |
| targetItem.classList.add('active'); |
| } |
| } |
| currentPreset = presetType; |
| updateQuoteModeState(presetType === 'quote'); |
| } |
|
|
| function updateQuoteModeState(isQuote) { |
| if (!demoResults) return; |
| demoResults.classList.toggle('quote-mode', Boolean(isQuote)); |
| } |
| |
| |
| updateActiveButton('quote'); |
| if (presetTexts.quote && typeof presetTexts.quote === 'object' && presetTexts.quote[currentLanguage]) { |
| demoTextInput.textContent = presetTexts.quote[currentLanguage]; |
| updateCharCounter(); |
| } |
| |
| presetItems.forEach(item => { |
| item.addEventListener('click', () => { |
| const presetType = item.getAttribute('data-preset'); |
| |
| if (presetType === 'freeform') { |
| |
| isPresetChanging = true; |
| demoTextInput.textContent = ''; |
| updateCharCounter(); |
| updateActiveButton('freeform'); |
| isPresetChanging = false; |
| } else { |
| |
| const preset = presetTexts[presetType]; |
| if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
| const text = preset[currentLanguage]; |
| isPresetChanging = true; |
| demoTextInput.textContent = text; |
| updateCharCounter(); |
| updateActiveButton(presetType); |
| isPresetChanging = false; |
| } else if (preset && typeof preset === 'string') { |
| |
| isPresetChanging = true; |
| demoTextInput.textContent = preset; |
| updateCharCounter(); |
| updateActiveButton(presetType); |
| isPresetChanging = false; |
| } |
| } |
| }); |
| }); |
|
|
| |
| demoTextInput.addEventListener('paste', (e) => { |
| e.preventDefault(); |
| const text = (e.clipboardData || window.clipboardData).getData('text/plain'); |
| const selection = window.getSelection(); |
| if (!selection.rangeCount) return; |
| |
| const range = selection.getRangeAt(0); |
| range.deleteContents(); |
| const textNode = document.createTextNode(text); |
| range.insertNode(textNode); |
| range.setStartAfter(textNode); |
| range.collapse(true); |
| selection.removeAllRanges(); |
| selection.addRange(range); |
| |
| |
| demoTextInput.dispatchEvent(new Event('input', { bubbles: true })); |
| }); |
|
|
| |
| let previousTextValue = demoTextInput.textContent || demoTextInput.innerText || ''; |
| |
| const demoInputSection = document.querySelector('.demo-input-section'); |
| function updateLeftBorderHeight() { |
| if (demoInputSection) { |
| const height = demoInputSection.offsetHeight; |
| demoInputSection.style.setProperty('--demo-text-input-height', `${height}px`); |
| } |
| } |
| |
| |
| updateLeftBorderHeight(); |
| const resizeObserver = new ResizeObserver(() => { |
| updateLeftBorderHeight(); |
| }); |
| if (demoInputSection) { |
| resizeObserver.observe(demoInputSection); |
| } |
| |
| |
| function calculateTextInputHeight() { |
| if (window.innerWidth <= 768) { |
| |
| demoTextInput.style.height = ''; |
| return; |
| } |
| |
| const viewportHeight = window.innerHeight; |
| const interactiveDemoEl = document.querySelector('.interactive-demo'); |
| const containerEl = document.querySelector('.container'); |
| const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
| const controlsEl = document.querySelector('.demo-controls'); |
| const inputLabelEl = document.querySelector('.demo-input-label'); |
| const presetRowEl = document.querySelector('#presetControlsRow'); |
| const outputSectionEl = document.querySelector('.demo-output-section'); |
| const contentEl = document.querySelector('.demo-content'); |
| |
| |
| const interactiveDemoStyle = window.getComputedStyle(interactiveDemoEl || document.body); |
| const containerStyle = window.getComputedStyle(containerEl || document.body); |
| const contentStyle = window.getComputedStyle(contentEl || document.body); |
| |
| |
| let totalHeight = 0; |
| |
| |
| const interactiveDemoPaddingTop = parseFloat(interactiveDemoStyle.paddingTop) || 0; |
| const interactiveDemoPaddingBottom = parseFloat(interactiveDemoStyle.paddingBottom) || 0; |
| totalHeight += interactiveDemoPaddingTop + interactiveDemoPaddingBottom; |
| |
| |
| const containerPaddingTop = parseFloat(containerStyle.paddingTop) || 0; |
| const containerPaddingBottom = parseFloat(containerStyle.paddingBottom) || 0; |
| totalHeight += containerPaddingTop + containerPaddingBottom; |
| |
| |
| if (headerWrapperEl) { |
| totalHeight += headerWrapperEl.offsetHeight; |
| } |
| |
| |
| if (controlsEl) { |
| totalHeight += controlsEl.offsetHeight; |
| } |
| |
| |
| const contentGap = parseFloat(contentStyle.gap) || 0; |
| totalHeight += contentGap; |
| |
| |
| if (inputLabelEl) { |
| totalHeight += inputLabelEl.offsetHeight; |
| } |
| |
| |
| if (presetRowEl) { |
| totalHeight += presetRowEl.offsetHeight; |
| } |
| |
| |
| totalHeight += contentGap; |
| |
| |
| if (outputSectionEl) { |
| totalHeight += outputSectionEl.offsetHeight; |
| } |
| |
| |
| const availableHeight = viewportHeight - totalHeight - 275; |
| |
| |
| const minHeight = 200; |
| const maxHeight = availableHeight - 20; |
| |
| if (availableHeight > minHeight) { |
| demoTextInput.style.height = `${Math.max(minHeight, maxHeight)}px`; |
| } else { |
| demoTextInput.style.height = `${minHeight}px`; |
| } |
| } |
| |
| |
| calculateTextInputHeight(); |
| window.addEventListener('resize', calculateTextInputHeight); |
| |
| |
| const heightObserver = new ResizeObserver(() => { |
| calculateTextInputHeight(); |
| }); |
| |
| const headerWrapperEl = document.querySelector('.demo-header-wrapper'); |
| const controlsEl = document.querySelector('.demo-controls'); |
| const presetRowEl = document.querySelector('#presetControlsRow'); |
| const outputSectionEl = document.querySelector('.demo-output-section'); |
| |
| if (headerWrapperEl) heightObserver.observe(headerWrapperEl); |
| if (controlsEl) heightObserver.observe(controlsEl); |
| if (presetRowEl) heightObserver.observe(presetRowEl); |
| if (outputSectionEl) heightObserver.observe(outputSectionEl); |
| |
| |
| let scrollbarTimeout; |
| demoTextInput.addEventListener('scroll', () => { |
| |
| demoTextInput.classList.add('scrolling'); |
| |
| |
| if (scrollbarTimeout) { |
| clearTimeout(scrollbarTimeout); |
| } |
| |
| |
| scrollbarTimeout = setTimeout(() => { |
| demoTextInput.classList.remove('scrolling'); |
| }, 1500); |
| }); |
| |
| demoTextInput.addEventListener('input', () => { |
| updateCharCounter(); |
| |
| |
| const currentText = demoTextInput.textContent || demoTextInput.innerText || ''; |
| if (!isPresetChanging && currentText !== previousTextValue) { |
| updateActiveButton('freeform'); |
| } |
| |
| if (currentPreset === 'freeform') { |
| |
| const detectedLang = detectLanguage(currentText); |
| if (detectedLang && detectedLang !== currentLanguage) { |
| const previousLang = currentLanguage; |
| currentLanguage = detectedLang; |
| window.updateActiveLanguage(currentLanguage); |
| showLanguageToast(previousLang, detectedLang); |
| } |
| } |
| |
| previousTextValue = currentText; |
| }); |
| |
| |
| let resizeTimeout; |
| window.addEventListener('resize', () => { |
| clearTimeout(resizeTimeout); |
| resizeTimeout = setTimeout(() => { |
| updateCharCounter(); |
| }, 100); |
| }); |
| |
| |
| updateCharCounter(); |
|
|
| |
| const speakerList = document.getElementById('speakerList'); |
| const speakerItems = speakerList ? speakerList.querySelectorAll('.speaker-item[data-voice]') : []; |
| const createVoiceBtn = document.getElementById('createVoiceBtn'); |
| const comingSoonModal = document.getElementById('comingSoonModal'); |
| const comingSoonCloseBtn = document.getElementById('comingSoonCloseBtn'); |
| let voiceSelectDisabled = false; |
| |
| |
| window.updateActiveSpeaker = function(voice) { |
| if (!speakerList || !speakerItems) return; |
| speakerItems.forEach(item => { |
| if (item.dataset.voice === voice) { |
| item.classList.add('active'); |
| } else { |
| item.classList.remove('active'); |
| } |
| }); |
| }; |
| |
| |
| if (speakerList && speakerItems.length > 0) { |
| window.updateActiveSpeaker(currentVoice); |
| } |
| |
| |
| const speakerTooltip = document.getElementById('speakerTooltip'); |
| |
| if (speakerList) { |
| speakerItems.forEach(item => { |
| |
| let clickFromTouch = false; |
| |
| |
| item.addEventListener('click', async (e) => { |
| |
| |
| if (isTouchDevice() && isMobileViewport() && !clickFromTouch) { |
| return; |
| } |
| |
| |
| clickFromTouch = false; |
| |
| if (voiceSelectDisabled || modelsLoading || isGenerating) return; |
| |
| const selectedVoice = item.dataset.voice; |
| |
| |
| if (selectedVoice === currentVoice) { |
| const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| generateSpeech(); |
| } |
| return; |
| } |
| |
| |
| const wasDisabled = demoGenerateBtn.disabled; |
| demoGenerateBtn.disabled = true; |
| voiceSelectDisabled = true; |
| |
| |
| window.updateActiveSpeaker(selectedVoice); |
| |
| try { |
| await switchVoice(selectedVoice); |
| |
| if (models && cfgs && processors) { |
| demoGenerateBtn.disabled = false; |
| voiceSelectDisabled = false; |
| |
| |
| const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| if (text.length >= 10 && !isGenerating) { |
| generateSpeech(); |
| } |
| } |
| } catch (error) { |
| console.error('Failed to switch voice:', error); |
| |
| window.updateActiveSpeaker(currentVoice); |
| voiceSelectDisabled = false; |
| if (!wasDisabled) demoGenerateBtn.disabled = false; |
| } |
| }); |
| |
| |
| if (speakerTooltip) { |
| |
| item.addEventListener('mouseenter', (e) => { |
| if (isTouchDevice() && isMobileViewport()) return; |
| const voice = item.dataset.voice; |
| if (voice && VOICE_DESCRIPTIONS[voice]) { |
| speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
| speakerTooltip.style.display = 'block'; |
| updateTooltipPosition(e, speakerTooltip); |
| } |
| }); |
| |
| item.addEventListener('mousemove', (e) => { |
| if (isTouchDevice() && isMobileViewport()) return; |
| if (speakerTooltip.style.display === 'block') { |
| updateTooltipPosition(e, speakerTooltip); |
| } |
| }); |
| |
| item.addEventListener('mouseleave', () => { |
| if (isTouchDevice() && isMobileViewport()) return; |
| speakerTooltip.style.display = 'none'; |
| }); |
| |
| |
| let touchStartTime = 0; |
| let touchHandled = false; |
| let touchStartY = 0; |
| const TOUCH_MOVE_THRESHOLD = 10; |
| |
| item.addEventListener('touchstart', (e) => { |
| if (!isTouchDevice() || !isMobileViewport()) return; |
| |
| touchHandled = false; |
| const touch = e.touches[0]; |
| touchStartTime = Date.now(); |
| touchStartY = touch.clientY; |
| |
| const voice = item.dataset.voice; |
| if (voice && VOICE_DESCRIPTIONS[voice]) { |
| |
| e.preventDefault(); |
| |
| |
| speakerTooltip.textContent = VOICE_DESCRIPTIONS[voice]; |
| speakerTooltip.style.display = 'block'; |
| updateTooltipPositionMobile(speakerTooltip, touch.clientY); |
| } |
| }, { passive: false }); |
| |
| item.addEventListener('touchmove', (e) => { |
| if (!isTouchDevice() || !isMobileViewport()) return; |
| |
| const touch = e.touches[0]; |
| const deltaY = Math.abs(touch.clientY - touchStartY); |
| |
| |
| if (deltaY > TOUCH_MOVE_THRESHOLD) { |
| touchHandled = true; |
| |
| speakerTooltip.style.display = 'none'; |
| } |
| |
| |
| e.preventDefault(); |
| }, { passive: false }); |
| |
| item.addEventListener('touchend', (e) => { |
| if (!isTouchDevice() || !isMobileViewport()) return; |
| |
| const touchEndTime = Date.now(); |
| const touchDuration = touchEndTime - touchStartTime; |
| |
| |
| speakerTooltip.style.display = 'none'; |
| |
| |
| e.preventDefault(); |
| |
| |
| if (!touchHandled && touchDuration < 500) { |
| |
| clickFromTouch = true; |
| setTimeout(() => { |
| const clickEvent = new MouseEvent('click', { |
| bubbles: true, |
| cancelable: true, |
| view: window |
| }); |
| item.dispatchEvent(clickEvent); |
| }, 50); |
| } else { |
| |
| touchHandled = true; |
| e.stopPropagation(); |
| } |
| }, { passive: false }); |
| |
| item.addEventListener('touchcancel', (e) => { |
| if (!isTouchDevice() || !isMobileViewport()) return; |
| |
| |
| speakerTooltip.style.display = 'none'; |
| touchHandled = true; |
| |
| |
| e.preventDefault(); |
| }, { passive: false }); |
| |
| |
| item.addEventListener('contextmenu', (e) => { |
| if (isTouchDevice() && isMobileViewport()) { |
| e.preventDefault(); |
| return false; |
| } |
| }); |
| } |
| }); |
| } |
|
|
| |
| function updateTooltipPosition(event, tooltip) { |
| const x = event.clientX; |
| const y = event.clientY - 40; |
| |
| tooltip.style.left = x + 'px'; |
| tooltip.style.top = y + 'px'; |
| |
| |
| const tooltipRect = tooltip.getBoundingClientRect(); |
| const windowWidth = window.innerWidth; |
| const windowHeight = window.innerHeight; |
| |
| if (tooltipRect.right > windowWidth) { |
| tooltip.style.left = (windowWidth - tooltipRect.width - 10) + 'px'; |
| } |
| if (tooltipRect.left < 0) { |
| tooltip.style.left = '10px'; |
| } |
| if (tooltipRect.top < 0) { |
| tooltip.style.top = (event.clientY + 40) + 'px'; |
| } |
| if (tooltipRect.bottom > windowHeight) { |
| tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
| } |
| } |
| |
| |
| function updateTooltipPositionMobile(tooltip, touchY) { |
| const windowWidth = window.innerWidth; |
| const windowHeight = window.innerHeight; |
| |
| |
| tooltip.style.width = '90%'; |
| tooltip.style.left = '5%'; |
| tooltip.style.right = 'auto'; |
| tooltip.style.marginLeft = '0'; |
| tooltip.style.marginRight = '0'; |
| tooltip.style.whiteSpace = 'normal'; |
| tooltip.style.textAlign = 'center'; |
| |
| |
| const y = touchY - 75; |
| tooltip.style.top = y + 'px'; |
| |
| |
| const tooltipRect = tooltip.getBoundingClientRect(); |
| |
| if (tooltipRect.top < 10) { |
| |
| tooltip.style.top = (touchY + 20) + 'px'; |
| } |
| if (tooltipRect.bottom > windowHeight - 10) { |
| tooltip.style.top = (windowHeight - tooltipRect.height - 10) + 'px'; |
| } |
| } |
| |
| |
| if (createVoiceBtn && comingSoonModal) { |
| createVoiceBtn.addEventListener('click', () => { |
| comingSoonModal.classList.add('show'); |
| }); |
| } |
| |
| |
| if (comingSoonCloseBtn && comingSoonModal) { |
| comingSoonCloseBtn.addEventListener('click', () => { |
| comingSoonModal.classList.remove('show'); |
| }); |
| } |
| |
| if (comingSoonModal) { |
| const overlay = comingSoonModal.querySelector('.coming-soon-modal-overlay'); |
| if (overlay) { |
| overlay.addEventListener('click', () => { |
| comingSoonModal.classList.remove('show'); |
| }); |
| } |
| } |
|
|
| |
| const languageList = document.getElementById('languageList'); |
| const languageItems = languageList ? languageList.querySelectorAll('.speaker-item[data-language]') : []; |
| |
| |
| window.updateActiveLanguage = function(language) { |
| if (!languageList || !languageItems) return; |
| languageItems.forEach(item => { |
| if (item.dataset.language === language) { |
| item.classList.add('active'); |
| } else { |
| item.classList.remove('active'); |
| } |
| }); |
| }; |
| |
| |
| if (languageList && languageItems.length > 0) { |
| window.updateActiveLanguage(currentLanguage); |
| } |
| |
| |
| if (languageList) { |
| languageItems.forEach(item => { |
| item.addEventListener('click', async (e) => { |
| |
| if (modelsLoading || isGenerating) return; |
| |
| const selectedLanguage = item.dataset.language; |
| |
| |
| if (selectedLanguage === currentLanguage) { |
| const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| generateSpeech(); |
| } |
| return; |
| } |
| |
| |
| currentLanguage = selectedLanguage; |
| window.updateActiveLanguage(currentLanguage); |
| |
| |
| if (currentPreset && currentPreset !== 'freeform' && presetTexts[currentPreset]) { |
| const preset = presetTexts[currentPreset]; |
| if (preset && typeof preset === 'object' && preset[currentLanguage]) { |
| isPresetChanging = true; |
| demoTextInput.textContent = preset[currentLanguage]; |
| updateCharCounter(); |
| isPresetChanging = false; |
| } |
| } |
| |
| |
| |
| await new Promise(resolve => setTimeout(resolve, 100)); |
| const text = (demoTextInput.textContent || demoTextInput.innerText || '').trim(); |
| if (text.length >= 10 && !isGenerating && models && cfgs && processors) { |
| generateSpeech(); |
| } |
| }); |
| }); |
| } |
|
|
| |
| const demoTitleLeft = document.querySelector('.demo-title-left'); |
| const demoTitleRight = document.querySelector('.demo-title-right'); |
| const demoOutputSection = document.querySelector('.demo-output-section'); |
|
|
| |
| if (demoTitleLeft) { |
| const text = demoTitleLeft.textContent.trim(); |
| demoTitleLeft.innerHTML = text.split('').map(char => |
| char === ' ' ? ' ' : `<span class="letter visible">${char}</span>` |
| ).join(''); |
| } |
|
|
| |
| if (demoInputSection && demoTitleLeft) { |
| demoInputSection.addEventListener('click', () => { |
| const letters = demoTitleLeft.querySelectorAll('.letter'); |
| |
| letters.forEach(letter => { |
| letter.classList.remove('visible'); |
| }); |
| |
| |
| letters.forEach((letter, index) => { |
| setTimeout(() => { |
| letter.classList.add('visible'); |
| }, index * 0.0625 * 1000); |
| }); |
| }); |
| } |
|
|
| |
| if (demoOutputSection && demoTitleRight) { |
| demoOutputSection.addEventListener('click', (event) => { |
| if (event.target.closest('#demoGenerateBtn')) { |
| return; |
| } |
| demoTitleRight.classList.remove('animate-speech'); |
| |
| void demoTitleRight.offsetWidth; |
| demoTitleRight.classList.add('animate-speech'); |
| }); |
| } |
|
|
| |
| initializeModels(); |
| })(); |
|
|