Spaces:

jscmp4
/

webgpu-smoke-test

Running

App Files Files Community

jscmp4 commited on Dec 10, 2025

Commit

55bd7f9

verified ·

1 Parent(s): 9aa9904

MVP with auto langrage detect

Browse files

Files changed (1) hide show

index.html +47 -44

index.html CHANGED Viewed

@@ -3,24 +3,26 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Web AI - MP3 转文字 (MVP版)</title>
     <style>
         body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #333; }
         h1 { border-bottom: 2px solid #eee; padding-bottom: 10px; }
-        /* 布局容器 */
         .container { background: #f9f9f9; padding: 20px; border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
-        /* 控件样式 */
         .controls { margin: 20px 0; display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
         input[type="file"] { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; }
         button { background: #000; color: #fff; border: none; padding: 10px 20px; border-radius: 6px; cursor: pointer; font-weight: bold; transition: opacity 0.2s; }
         button:disabled { background: #ccc; cursor: not-allowed; }
         button:hover:not(:disabled) { opacity: 0.8; }
-        /* 状态和结果区域 */
         #status { color: #666; margin-bottom: 10px; font-size: 0.9em; }
-        #audio-player { width: 100%; margin: 10px 0; display: none; } /* 默认隐藏播放器 */
         #result-area {
             width: 100%; height: 200px;
@@ -32,13 +34,20 @@
 </head>
 <body>
-    <h1>🎙️ 本地 MP3 转文字 (Whisper)</h1>
-    <p>选择一个音频文件，利用你浏览器的算力将其转换为文字。</p>
     <div class="container">
         <div id="status">🔵 正在初始化引擎...</div>
         <div class="controls">
             <input type="file" id="file-upload" accept="audio/*,video/*">
             <button id="run-btn" disabled>开始转换</button>
         </div>
@@ -52,91 +61,85 @@
     <script type="module">
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
-        // 允许本地缓存模型，下次刷新不用重新下载
         env.allowLocalModels = false;
         env.useBrowserCache = true;
-        // 获取 DOM 元素
         const statusEl = document.getElementById('status');
         const fileInput = document.getElementById('file-upload');
         const runBtn = document.getElementById('run-btn');
         const audioPlayer = document.getElementById('audio-player');
         const resultArea = document.getElementById('result-area');
         let transcriber = null;
-        // --- 核心步骤 1: 加载模型 ---
         async function initModel() {
-            statusEl.innerText = "⏳ 正在加载 Whisper 模型 (首次需下载 ~40MB)...";
             try {
-                // 使用 whisper-tiny 模型 (速度最快)
-                // 如果想要更高精度，把下面改成 'Xenova/whisper-base'
                 transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny');
-                statusEl.innerText = "✅ 模型就绪！请上传音频。";
-                runBtn.disabled = false; // 只有模型加载完了，按钮才能点
             } catch (err) {
                 statusEl.innerText = "❌ 模型加载失败: " + err.message;
             }
         }
-        // --- 核心步骤 2: 处理文件上传 ---
         fileInput.addEventListener('change', (e) => {
             const file = e.target.files[0];
             if (!file) return;
-            // 创建一个临时的 URL 让播放器能播放
             const url = URL.createObjectURL(file);
             audioPlayer.src = url;
-            audioPlayer.style.display = 'block'; // 显示播放器
-            resultArea.value = ""; // 清空上次结果
-            statusEl.innerText = "📂 文件已就绪，点击“开始转换”";
         });
-        // --- 核心步骤 3: 执行转换 ---
         runBtn.addEventListener('click', async () => {
             const file = fileInput.files[0];
             if (!file) { alert("请先选择文件！"); return; }
-            // UI 状态更新
             runBtn.disabled = true;
-            statusEl.innerText = "🚀 正在转换中... (长音频请耐心等待)";
             const startTime = performance.now();
             try {
-                // 将文件转为 Blob URL
                 const url = URL.createObjectURL(file);
-                // === 关键逻辑 ===
-                // 调用模型进行推理
-                const output = await transcriber(url, {
-                    chunk_length_s: 30, // 关键：每30秒切一片，处理长音频必备
-                    stride_length_s: 5, // 切片重叠长度，防止切断句子
-                    language: 'chinese', // 强制中文模式 (如果全是英文可以改成 english)
-                    task: 'transcribe',  // 任务类型：转录
-                });
-                // ===============
                 const endTime = performance.now();
                 const timeCost = ((endTime - startTime) / 1000).toFixed(2);
-                // 显示结果
                 resultArea.value = output.text;
-                statusEl.innerText = `✅ 转换完成！耗时: ${timeCost}秒`;
             } catch (err) {
                 console.error(err);
-                statusEl.innerText = "❌ 转换出错，请查看控制台(F12)";
-                resultArea.value = "错误详情:\n" + err.message;
             } finally {
-                runBtn.disabled = false; // 恢复按钮
             }
         });
-        // 页面加载时自动启动模型下载
         initModel();
     </script>
 </body>
 </html>

 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Web AI - 多语言语音转文字</title>
     <style>
         body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #333; }
         h1 { border-bottom: 2px solid #eee; padding-bottom: 10px; }
         .container { background: #f9f9f9; padding: 20px; border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
+        /* 控件布局优化 */
         .controls { margin: 20px 0; display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
+        /* 下拉菜单样式 */
+        select { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; cursor: pointer; }
         input[type="file"] { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; }
         button { background: #000; color: #fff; border: none; padding: 10px 20px; border-radius: 6px; cursor: pointer; font-weight: bold; transition: opacity 0.2s; }
         button:disabled { background: #ccc; cursor: not-allowed; }
         button:hover:not(:disabled) { opacity: 0.8; }
         #status { color: #666; margin-bottom: 10px; font-size: 0.9em; }
+        #audio-player { width: 100%; margin: 10px 0; display: none; }
         #result-area {
             width: 100%; height: 200px;
 </head>
 <body>
+    <h1>🎙️ 本地语音转文字 (多语言版)</h1>
+    <p>支持中文、英文自动识别，或手动指定语言。</p>
     <div class="container">
         <div id="status">🔵 正在初始化引擎...</div>
         <div class="controls">
+            <select id="language-select">
+                <option value="auto">🌐 自动识别 (Auto)</option>
+                <option value="chinese">🇨🇳 中文 (Chinese)</option>
+                <option value="english">🇺🇸 英文 (English)</option>
+                <option value="japanese">🇯🇵 日文 (Japanese)</option>
+                </select>
             <input type="file" id="file-upload" accept="audio/*,video/*">
             <button id="run-btn" disabled>开始转换</button>
         </div>
     <script type="module">
         import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
         env.allowLocalModels = false;
         env.useBrowserCache = true;
         const statusEl = document.getElementById('status');
         const fileInput = document.getElementById('file-upload');
         const runBtn = document.getElementById('run-btn');
         const audioPlayer = document.getElementById('audio-player');
         const resultArea = document.getElementById('result-area');
+        const langSelect = document.getElementById('language-select'); // 获取下拉菜单
         let transcriber = null;
         async function initModel() {
+            statusEl.innerText = "⏳ 正在加载 Whisper 模型...";
             try {
+                // 依然使用 tiny 模型，它本身就是 Multilingual 的
                 transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny');
+                statusEl.innerText = "✅ 模型就绪！";
+                runBtn.disabled = false;
             } catch (err) {
                 statusEl.innerText = "❌ 模型加载失败: " + err.message;
             }
         }
         fileInput.addEventListener('change', (e) => {
             const file = e.target.files[0];
             if (!file) return;
             const url = URL.createObjectURL(file);
             audioPlayer.src = url;
+            audioPlayer.style.display = 'block';
+            resultArea.value = "";
+            statusEl.innerText = "📂 文件就绪";
         });
         runBtn.addEventListener('click', async () => {
             const file = fileInput.files[0];
             if (!file) { alert("请先选择文件！"); return; }
             runBtn.disabled = true;
+            statusEl.innerText = "🚀 正在转换中...";
             const startTime = performance.now();
             try {
                 const url = URL.createObjectURL(file);
+                // --- 关键修改点 ---
+                // 获取用户选择的语言
+                const selectedLang = langSelect.value;
+                // 配置推理参数
+                let options = {
+                    chunk_length_s: 30,
+                    stride_length_s: 5,
+                    task: 'transcribe',
+                };
+                // 只有当用户没有选 "auto" 时，才强制指定语言
+                if (selectedLang !== 'auto') {
+                    options.language = selectedLang;
+                }
+                // ------------------
+                const output = await transcriber(url, options);
                 const endTime = performance.now();
                 const timeCost = ((endTime - startTime) / 1000).toFixed(2);
                 resultArea.value = output.text;
+                statusEl.innerText = `✅ 完成！耗时: ${timeCost}秒 (语言模式: ${selectedLang})`;
             } catch (err) {
                 console.error(err);
+                statusEl.innerText = "❌ 出错: " + err.message;
             } finally {
+                runBtn.disabled = false;
             }
         });
         initModel();
     </script>
 </body>
 </html>