jscmp4 commited on
Commit
55bd7f9
·
verified ·
1 Parent(s): 9aa9904

MVP with auto langrage detect

Browse files
Files changed (1) hide show
  1. index.html +47 -44
index.html CHANGED
@@ -3,24 +3,26 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Web AI - MP3 转文字 (MVP版)</title>
7
  <style>
8
  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #333; }
9
  h1 { border-bottom: 2px solid #eee; padding-bottom: 10px; }
10
 
11
- /* 布局容器 */
12
  .container { background: #f9f9f9; padding: 20px; border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
13
 
14
- /* 控件样式 */
15
  .controls { margin: 20px 0; display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
 
 
 
 
16
  input[type="file"] { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; }
17
  button { background: #000; color: #fff; border: none; padding: 10px 20px; border-radius: 6px; cursor: pointer; font-weight: bold; transition: opacity 0.2s; }
18
  button:disabled { background: #ccc; cursor: not-allowed; }
19
  button:hover:not(:disabled) { opacity: 0.8; }
20
 
21
- /* 状态和结果区域 */
22
  #status { color: #666; margin-bottom: 10px; font-size: 0.9em; }
23
- #audio-player { width: 100%; margin: 10px 0; display: none; } /* 默认隐藏播放器 */
24
 
25
  #result-area {
26
  width: 100%; height: 200px;
@@ -32,13 +34,20 @@
32
  </head>
33
  <body>
34
 
35
- <h1>🎙️ 本地 MP3 转文字 (Whisper)</h1>
36
- <p>选择一个音频件,利用你浏览器的算力将其转换为。</p>
37
 
38
  <div class="container">
39
  <div id="status">🔵 正在初始化引擎...</div>
40
 
41
  <div class="controls">
 
 
 
 
 
 
 
42
  <input type="file" id="file-upload" accept="audio/*,video/*">
43
  <button id="run-btn" disabled>开始转换</button>
44
  </div>
@@ -52,91 +61,85 @@
52
  <script type="module">
53
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
54
 
55
- // 允许本地缓存模型,下次刷新不用重新下载
56
  env.allowLocalModels = false;
57
  env.useBrowserCache = true;
58
 
59
- // 获取 DOM 元素
60
  const statusEl = document.getElementById('status');
61
  const fileInput = document.getElementById('file-upload');
62
  const runBtn = document.getElementById('run-btn');
63
  const audioPlayer = document.getElementById('audio-player');
64
  const resultArea = document.getElementById('result-area');
 
65
 
66
  let transcriber = null;
67
 
68
- // --- 核心步骤 1: 加载模型 ---
69
  async function initModel() {
70
- statusEl.innerText = "⏳ 正在加载 Whisper 模型 (首次需下载 ~40MB)...";
71
  try {
72
- // 使用 whisper-tiny 模型 (速度最快)
73
- // 如果想要更高精度,把下面改成 'Xenova/whisper-base'
74
  transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny');
75
-
76
- statusEl.innerText = "✅ 模型就绪!请上传音频。";
77
- runBtn.disabled = false; // 只有模型加载完了,按钮才能点
78
  } catch (err) {
79
  statusEl.innerText = "❌ 模型加载失败: " + err.message;
80
  }
81
  }
82
 
83
- // --- 核心步骤 2: 处理文件上传 ---
84
  fileInput.addEventListener('change', (e) => {
85
  const file = e.target.files[0];
86
  if (!file) return;
87
-
88
- // 创建一个临时的 URL 让播放器能播放
89
  const url = URL.createObjectURL(file);
90
  audioPlayer.src = url;
91
- audioPlayer.style.display = 'block'; // 显示播放器
92
-
93
- resultArea.value = ""; // 清空上次结果
94
- statusEl.innerText = "📂 文件已就绪,点击“开始转换”";
95
  });
96
 
97
- // --- 核心步骤 3: 执行转换 ---
98
  runBtn.addEventListener('click', async () => {
99
  const file = fileInput.files[0];
100
  if (!file) { alert("请先选择文件!"); return; }
101
 
102
- // UI 状态更新
103
  runBtn.disabled = true;
104
- statusEl.innerText = "🚀 正在转换中... (长音频请耐心等待)";
105
  const startTime = performance.now();
106
 
107
  try {
108
- // 将文件转为 Blob URL
109
  const url = URL.createObjectURL(file);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
 
111
- // === 关键逻辑 ===
112
- // 调用模型进行推理
113
- const output = await transcriber(url, {
114
- chunk_length_s: 30, // 关键:每30秒切一片,处理长音频必备
115
- stride_length_s: 5, // 切片重叠长度,防止切断句子
116
- language: 'chinese', // 强制中文模式 (如果全是英文可以改成 english)
117
- task: 'transcribe', // 任务类型:转录
118
- });
119
- // ===============
120
 
121
  const endTime = performance.now();
122
  const timeCost = ((endTime - startTime) / 1000).toFixed(2);
123
-
124
- // 显示结果
125
  resultArea.value = output.text;
126
- statusEl.innerText = `✅ 转换完成!耗时: ${timeCost}秒`;
127
 
128
  } catch (err) {
129
  console.error(err);
130
- statusEl.innerText = "❌ 转换出错,请查看控制台(F12)";
131
- resultArea.value = "错误详情:\n" + err.message;
132
  } finally {
133
- runBtn.disabled = false; // 恢复按钮
134
  }
135
  });
136
 
137
- // 页面加载时自动启动模型下载
138
  initModel();
139
-
140
  </script>
141
  </body>
142
  </html>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Web AI - 多语言语音转文字</title>
7
  <style>
8
  body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif; max-width: 800px; margin: 2rem auto; padding: 0 1rem; color: #333; }
9
  h1 { border-bottom: 2px solid #eee; padding-bottom: 10px; }
10
 
 
11
  .container { background: #f9f9f9; padding: 20px; border-radius: 12px; box-shadow: 0 2px 10px rgba(0,0,0,0.05); }
12
 
13
+ /* 控件布局优化 */
14
  .controls { margin: 20px 0; display: flex; gap: 10px; flex-wrap: wrap; align-items: center; }
15
+
16
+ /* 下拉菜单样式 */
17
+ select { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; cursor: pointer; }
18
+
19
  input[type="file"] { padding: 10px; border: 1px solid #ddd; border-radius: 6px; background: white; }
20
  button { background: #000; color: #fff; border: none; padding: 10px 20px; border-radius: 6px; cursor: pointer; font-weight: bold; transition: opacity 0.2s; }
21
  button:disabled { background: #ccc; cursor: not-allowed; }
22
  button:hover:not(:disabled) { opacity: 0.8; }
23
 
 
24
  #status { color: #666; margin-bottom: 10px; font-size: 0.9em; }
25
+ #audio-player { width: 100%; margin: 10px 0; display: none; }
26
 
27
  #result-area {
28
  width: 100%; height: 200px;
 
34
  </head>
35
  <body>
36
 
37
+ <h1>🎙️ 本地语音转文字 (多语言版)</h1>
38
+ <p>支持中、英自动识别,或手动指定语言。</p>
39
 
40
  <div class="container">
41
  <div id="status">🔵 正在初始化引擎...</div>
42
 
43
  <div class="controls">
44
+ <select id="language-select">
45
+ <option value="auto">🌐 自动识别 (Auto)</option>
46
+ <option value="chinese">🇨🇳 中文 (Chinese)</option>
47
+ <option value="english">🇺🇸 英文 (English)</option>
48
+ <option value="japanese">🇯🇵 日文 (Japanese)</option>
49
+ </select>
50
+
51
  <input type="file" id="file-upload" accept="audio/*,video/*">
52
  <button id="run-btn" disabled>开始转换</button>
53
  </div>
 
61
  <script type="module">
62
  import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2';
63
 
 
64
  env.allowLocalModels = false;
65
  env.useBrowserCache = true;
66
 
 
67
  const statusEl = document.getElementById('status');
68
  const fileInput = document.getElementById('file-upload');
69
  const runBtn = document.getElementById('run-btn');
70
  const audioPlayer = document.getElementById('audio-player');
71
  const resultArea = document.getElementById('result-area');
72
+ const langSelect = document.getElementById('language-select'); // 获取下拉菜单
73
 
74
  let transcriber = null;
75
 
 
76
  async function initModel() {
77
+ statusEl.innerText = "⏳ 正在加载 Whisper 模型...";
78
  try {
79
+ // 依然使用 tiny 模型,它本身就是 Multilingual 的
 
80
  transcriber = await pipeline('automatic-speech-recognition', 'Xenova/whisper-tiny');
81
+ statusEl.innerText = "✅ 模型就绪!";
82
+ runBtn.disabled = false;
 
83
  } catch (err) {
84
  statusEl.innerText = "❌ 模型加载失败: " + err.message;
85
  }
86
  }
87
 
 
88
  fileInput.addEventListener('change', (e) => {
89
  const file = e.target.files[0];
90
  if (!file) return;
 
 
91
  const url = URL.createObjectURL(file);
92
  audioPlayer.src = url;
93
+ audioPlayer.style.display = 'block';
94
+ resultArea.value = "";
95
+ statusEl.innerText = "📂 文件就绪";
 
96
  });
97
 
 
98
  runBtn.addEventListener('click', async () => {
99
  const file = fileInput.files[0];
100
  if (!file) { alert("请先选择文件!"); return; }
101
 
 
102
  runBtn.disabled = true;
103
+ statusEl.innerText = "🚀 正在转换中...";
104
  const startTime = performance.now();
105
 
106
  try {
 
107
  const url = URL.createObjectURL(file);
108
+
109
+ // --- 关键修改点 ---
110
+ // 获取用户选择的语言
111
+ const selectedLang = langSelect.value;
112
+
113
+ // 配置推理参数
114
+ let options = {
115
+ chunk_length_s: 30,
116
+ stride_length_s: 5,
117
+ task: 'transcribe',
118
+ };
119
+
120
+ // 只有当用户没有选 "auto" 时,才强制指定语言
121
+ if (selectedLang !== 'auto') {
122
+ options.language = selectedLang;
123
+ }
124
+ // ------------------
125
 
126
+ const output = await transcriber(url, options);
 
 
 
 
 
 
 
 
127
 
128
  const endTime = performance.now();
129
  const timeCost = ((endTime - startTime) / 1000).toFixed(2);
130
+
 
131
  resultArea.value = output.text;
132
+ statusEl.innerText = `✅ 完成!耗时: ${timeCost}秒 (语言模式: ${selectedLang})`;
133
 
134
  } catch (err) {
135
  console.error(err);
136
+ statusEl.innerText = "❌ 出错: " + err.message;
 
137
  } finally {
138
+ runBtn.disabled = false;
139
  }
140
  });
141
 
 
142
  initModel();
 
143
  </script>
144
  </body>
145
  </html>