techfreakworm commited on
Commit
ded73f2
·
unverified ·
1 Parent(s): 108b9f8

fix: re-encode reference voices as PCM WAV in browser; expand turbo tag list to 9

Browse files
server/models/chatterbox_turbo.py CHANGED
@@ -13,10 +13,21 @@ class Adapter:
13
  id: ClassVar[str] = "chatterbox-turbo"
14
  label: ClassVar[str] = "Chatterbox Turbo"
15
  description: ClassVar[str] = (
16
- "Faster, lower-VRAM English variant. Supports [laugh], [cough], [chuckle] tags."
 
17
  )
18
  languages: ClassVar[list[Lang]] = [Lang(code="en", label="English")]
19
- paralinguistic_tags: ClassVar[list[str]] = ["[laugh]", "[cough]", "[chuckle]"]
 
 
 
 
 
 
 
 
 
 
20
  supports_voice_clone: ClassVar[bool] = True
21
  params: ClassVar[list[ParamSpec]] = [
22
  ParamSpec(name="cfg_weight", label="CFG weight", type="float",
 
13
  id: ClassVar[str] = "chatterbox-turbo"
14
  label: ClassVar[str] = "Chatterbox Turbo"
15
  description: ClassVar[str] = (
16
+ "Faster, lower-VRAM English variant. Supports event tags: "
17
+ "[laugh] [chuckle] [sigh] [gasp] [cough] [sniff] [groan] [clear throat] [shush]."
18
  )
19
  languages: ClassVar[list[Lang]] = [Lang(code="en", label="English")]
20
+ paralinguistic_tags: ClassVar[list[str]] = [
21
+ "[laugh]",
22
+ "[chuckle]",
23
+ "[sigh]",
24
+ "[gasp]",
25
+ "[cough]",
26
+ "[sniff]",
27
+ "[groan]",
28
+ "[clear throat]",
29
+ "[shush]",
30
+ ]
31
  supports_voice_clone: ClassVar[bool] = True
32
  params: ClassVar[list[ParamSpec]] = [
33
  ParamSpec(name="cfg_weight", label="CFG weight", type="float",
web/src/components/VoiceComposer.tsx CHANGED
@@ -1,6 +1,7 @@
1
  import { useRef, useState } from "react";
2
  import { Recorder } from "@/lib/audio";
3
  import { addVoice } from "@/lib/idb";
 
4
 
5
  type Props = {
6
  onSaved: () => void;
@@ -16,9 +17,11 @@ export default function VoiceComposer({ onSaved }: Props) {
16
  const arr = new Uint8Array(await blob.arrayBuffer());
17
  const ctx = new AudioContext();
18
  const buf = await ctx.decodeAudioData(arr.buffer.slice(0));
 
 
19
  await addVoice({
20
  name: name || defaultName || `voice-${Date.now()}`,
21
- blob,
22
  sampleRate: buf.sampleRate,
23
  durationMs: Math.round(buf.duration * 1000),
24
  });
 
1
  import { useRef, useState } from "react";
2
  import { Recorder } from "@/lib/audio";
3
  import { addVoice } from "@/lib/idb";
4
+ import { encodeWav } from "@/lib/wav";
5
 
6
  type Props = {
7
  onSaved: () => void;
 
17
  const arr = new Uint8Array(await blob.arrayBuffer());
18
  const ctx = new AudioContext();
19
  const buf = await ctx.decodeAudioData(arr.buffer.slice(0));
20
+ // Re-encode as 16-bit PCM mono WAV so the server (libsndfile) can decode it.
21
+ const wav = encodeWav(buf);
22
  await addVoice({
23
  name: name || defaultName || `voice-${Date.now()}`,
24
+ blob: wav,
25
  sampleRate: buf.sampleRate,
26
  durationMs: Math.round(buf.duration * 1000),
27
  });
web/src/lib/wav.ts ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * Encode a decoded AudioBuffer as a 16-bit PCM mono WAV Blob.
3
+ * Used at upload/record time so the server (libsndfile) can always decode it.
4
+ */
5
+ export function encodeWav(buf: AudioBuffer): Blob {
6
+ const sr = buf.sampleRate;
7
+ const length = buf.length;
8
+ const numCh = 1;
9
+
10
+ // Mix down to mono in float32
11
+ const mono = new Float32Array(length);
12
+ const ch0 = buf.getChannelData(0);
13
+ if (buf.numberOfChannels === 1) {
14
+ mono.set(ch0);
15
+ } else {
16
+ const ch1 = buf.getChannelData(1);
17
+ for (let i = 0; i < length; i++) mono[i] = (ch0[i] + ch1[i]) / 2;
18
+ }
19
+
20
+ // Convert to int16
21
+ const dataSize = length * numCh * 2;
22
+ const out = new ArrayBuffer(44 + dataSize);
23
+ const view = new DataView(out);
24
+ let p = 0;
25
+ const writeStr = (s: string) => {
26
+ for (let i = 0; i < s.length; i++) view.setUint8(p++, s.charCodeAt(i));
27
+ };
28
+ const writeU32 = (n: number) => {
29
+ view.setUint32(p, n, true);
30
+ p += 4;
31
+ };
32
+ const writeU16 = (n: number) => {
33
+ view.setUint16(p, n, true);
34
+ p += 2;
35
+ };
36
+
37
+ writeStr("RIFF");
38
+ writeU32(36 + dataSize);
39
+ writeStr("WAVE");
40
+ writeStr("fmt ");
41
+ writeU32(16);
42
+ writeU16(1); // PCM
43
+ writeU16(numCh);
44
+ writeU32(sr);
45
+ writeU32(sr * numCh * 2);
46
+ writeU16(numCh * 2);
47
+ writeU16(16);
48
+ writeStr("data");
49
+ writeU32(dataSize);
50
+
51
+ for (let i = 0; i < length; i++) {
52
+ const s = Math.max(-1, Math.min(1, mono[i]));
53
+ view.setInt16(44 + i * 2, s < 0 ? s * 0x8000 : s * 0x7fff, true);
54
+ }
55
+
56
+ return new Blob([out], { type: "audio/wav" });
57
+ }