"""
services/streaming.py — Production-grade parallel TTS streamer

FIX-ISSUE4 (Natural, slow, small-chunk TTS):
  The previous code used character-count thresholds that produced large
  sentence-level chunks (25–65 chars), causing buffered, robotic-feeling
  speech with a burst of audio at once.

  New behaviour:
    • Flush at word boundaries (2–3 words) for voice-like pacing.
    • Flush threshold is ~15 chars first chunk, ~25 chars subsequent — which
      corresponds to roughly 2–3 average Bengali/English words.
    • Hard limit of 40 chars ensures no chunk ever gets too large.
    • Sentence-ending punctuation (।.!?) always flushes immediately
      regardless of length, giving natural pause points.
    • The TTS rate is slightly faster than neutral in tts.py for a more
      conversational pace.

  Result: audio arrives in small, fast, overlapping synthesis tasks,
  giving a low-latency, smooth, natural speech feel.

FIX-BUG5 (TOCTOU race in stream_audio) — preserved from previous version.
"""

from __future__ import annotations

import asyncio
import re
from dataclasses import dataclass, field
from typing import AsyncGenerator

from services.tts import text_to_speech_stream, USE_ELEVENLABS, EDGE_VOICE

# ── Chunk size tuning ──────────────────────────────────────────────────────────
# These character counts correspond roughly to:
#   FIRST_FLUSH_MIN       ~2 words  (get audio playing ASAP)
#   SUBSEQUENT_FLUSH_MIN  ~3 words  (natural conversational phrase)
#   HARD_LIMIT            ~6 words  (never accumulate more than this)
#
# At average Bengali word length ~4–5 chars + space:
#   10 chars ≈ 2 words, 18 chars ≈ 3-4 words, 40 chars ≈ 7-8 words

if USE_ELEVENLABS:
    # ElevenLabs per-chunk latency is higher; flush smaller chunks so the
    # first playable audio arrives sooner and pauses feel shorter.
    FIRST_FLUSH_MIN        = 8
    FIRST_FLUSH_HARD       = 18
    SUBSEQUENT_FLUSH_MIN   = 14
    SUBSEQUENT_FLUSH_HARD  = 28
else:
    FIRST_FLUSH_MIN        = 10
    FIRST_FLUSH_HARD       = 30
    SUBSEQUENT_FLUSH_MIN   = 18
    SUBSEQUENT_FLUSH_HARD  = 40

_backend_label = "ElevenLabs" if USE_ELEVENLABS else "Edge-TTS"
print(f"[Streamer] TTS backend: {_backend_label} | chunk: {SUBSEQUENT_FLUSH_MIN}–{SUBSEQUENT_FLUSH_HARD} chars")

MIN_CHARS           = 2
SENTENCE_BOUNDARIES = frozenset(".!?।॥\n")
CLAUSE_BOUNDARIES   = frozenset(",;:—–")
_SENTINEL           = object()

_DIGIT_WORDS = {
    "0": "শূন্য",
    "1": "এক",
    "2": "দুই",
    "3": "তিন",
    "4": "চার",
    "5": "পাঁচ",
    "6": "ছয়",
    "7": "সাত",
    "8": "আট",
    "9": "নয়",
    "০": "শূন্য",
    "১": "এক",
    "২": "দুই",
    "৩": "তিন",
    "৪": "চার",
    "৫": "পাঁচ",
    "৬": "ছয়",
    "৭": "সাত",
    "৮": "আট",
    "৯": "নয়",
    "٠": "শূন্য",
    "١": "এক",
    "٢": "দুই",
    "٣": "তিন",
    "٤": "চার",
    "٥": "পাঁচ",
    "٦": "ছয়",
    "٧": "সাত",
    "٨": "আট",
    "٩": "নয়",
}


def _spoken_phone_text(text: str) -> str:
    if not text:
        return ""

    def repl(match: re.Match[str]) -> str:
        chunk = match.group(0)
        digits = [ch for ch in chunk if ch in _DIGIT_WORDS]
        if len(digits) < 10:
            return chunk
        spoken = " ".join(_DIGIT_WORDS[ch] for ch in digits)
        prev_char = text[match.start() - 1] if match.start() > 0 else ""
        next_char = text[match.end()] if match.end() < len(text) else ""
        if prev_char and not prev_char.isspace() and prev_char not in "([<{\"'":
            spoken = " " + spoken
        if next_char and not next_char.isspace() and next_char not in ")]>.,!?;:}\"'":
            spoken = spoken + " "
        return spoken

    out = re.sub(r"[+\d০-৯٠-٩][\d০-৯٠-٩\s().\-]{8,}[\d০-৯٠-٩]", repl, text)
    return re.sub(r"[ \t]{2,}", " ", out)


def _clean_for_tts(text: str) -> str:
    # Strip emotion/tone tags like "[calm]" "[neutral]" "[happy]" etc.
    # These are useful for UI but often degrade or break TTS synthesis.
    # Remove them wherever they appear, then normalize whitespace.
    text = re.sub(r"(?:(?<=^)|(?<=\s))\[[^\[\]\n]{1,24}\](?=\s|$)", "", text)
    # Also strip orphaned tag fragments that can occur if the streamer flushes
    # mid-tag during token streaming (e.g. "[neutral" or "neutral]").
    text = re.sub(r"(?:(?<=^)|(?<=\s))\[[A-Za-z]{2,16}(?=\s|$)", "", text)
    text = re.sub(r"(?:(?<=^)|(?<=\s))[A-Za-z]{2,16}\](?=\s|$)", "", text)
    text = re.sub(r"\*{1,3}", "", text)
    text = re.sub(r"#+\s*", "", text)
    text = re.sub(r"^\s*[-•]\s*", "", text, flags=re.MULTILINE)
    text = re.sub(r"^\s*[\d০-৯]+[.)]\s*", "", text, flags=re.MULTILINE)
    text = re.sub(r"`+", "", text)
    text = re.sub(r"\n{2,}", "\n", text)
    # Collapse runs of spaces introduced by tag removal.
    text = re.sub(r"[ \t]{2,}", " ", text)
    text = _spoken_phone_text(text)
    # Keep normal spaces so chunk boundaries don't glue words together.
    return text.strip("\n\r\t")


def _flush_reason(buffer: str, first_chunk: bool) -> str | None:
    """
    Like _should_flush, but returns the reason so we can preserve spacing
    when flushing at a word boundary.
    """
    n = len(buffer)
    if n == 0:
        return None

    flush_min  = FIRST_FLUSH_MIN  if first_chunk else SUBSEQUENT_FLUSH_MIN
    hard_limit = FIRST_FLUSH_HARD if first_chunk else SUBSEQUENT_FLUSH_HARD

    if n >= hard_limit:
        return "hard"

    last_char = buffer[-1]

    if last_char in SENTENCE_BOUNDARIES and n >= flush_min:
        return "sentence"

    if last_char in CLAUSE_BOUNDARIES and n >= hard_limit * 0.70:
        return "clause"

    if last_char == " " and n >= flush_min:
        return "space"

    return None


def _should_flush(buffer: str, first_chunk: bool) -> bool:
    n = len(buffer)
    if n == 0:
        return False

    flush_min  = FIRST_FLUSH_MIN  if first_chunk else SUBSEQUENT_FLUSH_MIN
    hard_limit = FIRST_FLUSH_HARD if first_chunk else SUBSEQUENT_FLUSH_HARD

    # Hard limit — always flush regardless of boundary
    if n >= hard_limit:
        return True

    last_char = buffer[-1]

    # Sentence ending — flush immediately (natural pause point)
    if last_char in SENTENCE_BOUNDARIES and n >= flush_min:
        return True

    # Clause boundary — flush at ~75% of hard limit
    if last_char in CLAUSE_BOUNDARIES and n >= hard_limit * 0.70:
        return True

    # Word boundary (space after minimum words reached)
    if last_char == ' ' and n >= flush_min:
        return True

    return False


@dataclass
class _AudioSlot:
    index: int
    queue: asyncio.Queue = field(default_factory=lambda: asyncio.Queue())
    done:  bool = False

    def mark_done(self)  -> None: self.done = True; self.queue.put_nowait(_SENTINEL)
    def mark_error(self) -> None: self.done = True; self.queue.put_nowait(_SENTINEL)


class ParallelTTSStreamer:
    def __init__(self, voice: str | None = None) -> None:
        self.voice        = voice
        self.buffer       = ""
        self._cancelled   = False
        self._first_chunk = True
        self._carry_space = False
        self._slot_index  = 0
        self._slots: list[_AudioSlot] = []
        self._slots_lock  = asyncio.Lock()
        self._tasks: list[asyncio.Task] = []
        self._llm_done    = asyncio.Event()
        self._slot_added  = asyncio.Event()
        self._last_flush_t: float = 0.0
        self._last_token_t: float = 0.0

    async def add_token(self, token: str) -> None:
        if not token or self._cancelled:
            return
        loop = asyncio.get_running_loop()
        now  = loop.time()
        self._last_token_t = now
        # If we flushed at a word boundary previously, preserve a single
        # inter-word space so Bengali/English words don't get glued together.
        if self.buffer == " " and token[:1].isspace():
            token = token.lstrip()
        self.buffer += token

        reason = _flush_reason(self.buffer, self._first_chunk)
        if reason is not None:
            self._first_chunk = False
            self._carry_space = (reason == "space")
            await self._schedule_chunk()
            self._last_flush_t = now
            return

        # Safety valve: if tokens arrive without good boundaries, we can go a
        # long time without scheduling any TTS slots → streamer timeout/no audio.
        # Force a flush after a short delay once we have enough text.
        flush_min = FIRST_FLUSH_MIN if self._first_chunk else SUBSEQUENT_FLUSH_MIN
        if len(self.buffer) >= flush_min and (now - self._last_flush_t) >= 0.8:
            self._first_chunk = False
            # Time-based flush: don't force a carry space.
            self._carry_space = False
            await self._schedule_chunk()
            self._last_flush_t = now

    async def _schedule_chunk(self) -> None:
        if self._cancelled:
            self.buffer = ""
            return
        raw = self.buffer
        self.buffer = " " if self._carry_space else ""
        self._carry_space = False
        # IMPORTANT: don't lose an inter-word space when the flush happened
        # exactly at a word boundary (buffer ended with " ").
        text = _clean_for_tts(raw)
        if len(text) < MIN_CHARS:
            return
        async with self._slots_lock:
            slot = _AudioSlot(index=self._slot_index)
            self._slot_index += 1
            self._slots.append(slot)
            self._slot_added.set()
        task = asyncio.create_task(self._synthesise(text, slot))
        self._tasks.append(task)
        task.add_done_callback(
            lambda t: self._tasks.remove(t) if t in self._tasks else None
        )

    async def _synthesise(self, text: str, slot: _AudioSlot) -> None:
        if self._cancelled:
            slot.mark_error()
            return
        try:
            async for chunk in text_to_speech_stream(text, voice=self.voice):
                if self._cancelled:
                    break
                await slot.queue.put(chunk)
        except asyncio.CancelledError:
            pass
        except Exception as exc:
            print(f"[Streamer] TTS error for '{text[:50]}': {exc}")
        finally:
            slot.mark_done()

    async def flush(self) -> None:
        if self.buffer.strip():
            await self._schedule_chunk()
        self._llm_done.set()

    async def cancel(self) -> None:
        self._cancelled = True
        tasks = list(self._tasks)
        self._tasks.clear()
        for t in tasks:
            t.cancel()
        if tasks:
            await asyncio.gather(*tasks, return_exceptions=True)
        async with self._slots_lock:
            for slot in self._slots:
                if not slot.done:
                    slot.mark_error()
        self._llm_done.set()
        self._slot_added.set()

    async def stream_audio(self) -> AsyncGenerator[bytes, None]:
        """
        Deliver TTS audio chunks in slot order.

        FIX-BUG5 — double-check pattern eliminates TOCTOU race:
          1. clear() the event
          2. Re-check slot list under lock (slot may have been added between
             previous check and clear())
          3. Only then wait() — so we never miss a newly-added slot
        """
        delivered = 0
        while True:
            async with self._slots_lock:
                slot = self._slots[delivered] if delivered < len(self._slots) else None

            if slot is None:
                if self._llm_done.is_set():
                    async with self._slots_lock:
                        total = len(self._slots)
                    if delivered >= total:
                        break  # All slots consumed; done.

                # FIX-BUG5: clear → re-check → wait
                self._slot_added.clear()
                async with self._slots_lock:
                    have_new = delivered < len(self._slots)
                if have_new:
                    continue
                try:
                    await asyncio.wait_for(self._slot_added.wait(), timeout=30.0)
                except asyncio.TimeoutError:
                    # Don't abort the whole stream; LLM/TTS backends can stall.
                    # Keep waiting unless the LLM already finished.
                    if self._llm_done.is_set():
                        break
                    print("[Streamer] Timeout waiting for TTS slot (continuing)…")
                    continue
                continue

            # Drain this slot's audio queue in order
            while True:
                item = await slot.queue.get()
                if item is _SENTINEL:
                    break
                if not self._cancelled:
                    yield item
            delivered += 1

    def reset(self) -> None:
        self._cancelled   = False
        self._first_chunk = True
        self._carry_space = False
        self.buffer       = ""
        self._slot_index  = 0
        self._slots.clear()
        self._tasks.clear()
        self._llm_done.clear()
        self._slot_added.clear()