Spaces:

techfreakworm
/

chatterbox-voice-studio

Running

techfreakworm commited on 22 days ago

Commit

5d81907

unverified ·

1 Parent(s): 8122b04

feat(dialog): parse_dialog with SPEAKER A-D regex and edge-case handling

Files changed (2) hide show

server/dialog.py ADDED Viewed

+"""Dialog mode: parse SPEAKER X: scripts into ordered turns and stitch
+per-turn outputs into a single concatenated WAV.
+Generator is in this same file but added in Task 12.
+"""
+from __future__ import annotations
+import re
+from dataclasses import dataclass
+_SPEAKER_RE = re.compile(r"^\s*SPEAKER\s+([A-D])\s*:\s*", re.MULTILINE)
+@dataclass(frozen=True)
+class DialogTurn:
+    speaker: str   # "A" | "B" | "C" | "D"
+    text: str
+class DialogParseError(ValueError):
+    """Raised when a dialog script can't be parsed into turns."""
+def parse_dialog(text: str) -> list[DialogTurn]:
+    matches = list(_SPEAKER_RE.finditer(text))
+    if not matches:
+        raise DialogParseError(
+            "Use SPEAKER A: ... / SPEAKER B: ... lines to define turns."
+        )
+    turns: list[DialogTurn] = []
+    for i, m in enumerate(matches):
+        start = m.end()
+        end = matches[i + 1].start() if i + 1 < len(matches) else len(text)
+        block = text[start:end].strip()
+        if block:
+            turns.append(DialogTurn(speaker=m.group(1), text=block))
+    if not turns:
+        raise DialogParseError("No non-empty speaker turns found.")
+    return turns

tests/test_dialog_parser.py ADDED Viewed

+import pytest
+from server.dialog import DialogParseError, DialogTurn, parse_dialog
+def test_simple_a_b_alternation():
+    text = "SPEAKER A: hi\nSPEAKER B: hello"
+    turns = parse_dialog(text)
+    assert turns == [
+        DialogTurn(speaker="A", text="hi"),
+        DialogTurn(speaker="B", text="hello"),
+    ]
+def test_multi_line_turn():
+    text = "SPEAKER A: line one\nstill A\nSPEAKER B: end."
+    turns = parse_dialog(text)
+    assert turns[0].speaker == "A"
+    assert turns[0].text == "line one\nstill A"
+    assert turns[1].speaker == "B"
+    assert turns[1].text == "end."
+def test_leading_whitespace_tolerated():
+    text = "   SPEAKER A: hi\n   SPEAKER B: hello"
+    turns = parse_dialog(text)
+    assert [t.speaker for t in turns] == ["A", "B"]
+def test_missing_prefix_raises():
+    with pytest.raises(DialogParseError):
+        parse_dialog("plain text with no speakers")
+def test_unknown_letter_is_ignored_so_no_match_raises():
+    # "SPEAKER E: ..." doesn't match the regex -> treated as no tags.
+    with pytest.raises(DialogParseError):
+        parse_dialog("SPEAKER E: nope")
+def test_three_consecutive_a_turns():
+    text = "SPEAKER A: one\nSPEAKER A: two\nSPEAKER A: three"
+    turns = parse_dialog(text)
+    assert [t.text for t in turns] == ["one", "two", "three"]
+def test_empty_turn_is_dropped():
+    text = "SPEAKER A: hi\nSPEAKER B:\nSPEAKER C: bye"
+    turns = parse_dialog(text)
+    assert [t.speaker for t in turns] == ["A", "C"]