"""
Step 6b: Generate ASS subtitle file from translated segments.

Produces OpusClip-style karaoke captions: max 4 words per line,
with word-by-word highlight using ASS \\kf tags.
"""
from pathlib import Path


_RTL_LANGUAGES = {"Arabic", "Hebrew", "Urdu", "Farsi", "Persian"}


def _format_ass_time(seconds: float) -> str:
    """Convert seconds to ASS timestamp format: H:MM:SS.cc"""
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = int(seconds % 60)
    cs = int(round((seconds - int(seconds)) * 100))
    return f"{h}:{m:02d}:{s:02d}.{cs:02d}"


def _build_karaoke_chunks(
    text: str,
    seg_start: float,
    seg_end: float,
    max_words: int = 4,
) -> list[dict]:
    """Split text into timed word chunks for karaoke display.

    Distributes the segment duration across words proportionally
    to character count, then groups into chunks of max_words.

    Returns list of {"words": [(word, duration_cs), ...], "start": float, "end": float}.
    """
    raw_words = text.split()
    if not raw_words:
        return []

    total_duration = max(seg_end - seg_start, 0.1)
    total_chars = sum(max(len(w), 1) for w in raw_words)

    # Character-weighted durations
    word_durations = []
    for w in raw_words:
        frac = max(len(w), 1) / total_chars
        dur = total_duration * frac
        word_durations.append(max(dur, 0.05))

    # Normalize so they sum to total_duration exactly
    dur_sum = sum(word_durations)
    word_durations = [d * total_duration / dur_sum for d in word_durations]

    # Build absolute timestamps per word
    timestamps = []
    t = seg_start
    for dur in word_durations:
        timestamps.append((t, t + dur))
        t += dur

    # Group into chunks
    chunks = []
    for i in range(0, len(raw_words), max_words):
        chunk_words = raw_words[i:i + max_words]
        chunk_durs = word_durations[i:i + max_words]
        chunk_start = timestamps[i][0]
        chunk_end = timestamps[min(i + max_words, len(raw_words)) - 1][1]

        words_with_timing = []
        for w, dur in zip(chunk_words, chunk_durs):
            cs = max(round(dur * 100), 1)  # centiseconds, minimum 1
            words_with_timing.append((w, cs))

        chunks.append({
            "words": words_with_timing,
            "start": chunk_start,
            "end": chunk_end,
        })

    return chunks


def _format_karaoke_line(
    chunk: dict,
    style_name: str = "Karaoke",
    is_rtl: bool = False,
    highlight_color: str = "00FFFF",
) -> str:
    """Format a karaoke chunk as an ASS Dialogue line.

    For RTL chunks: reverse the segment order so words read right-to-left,
    AND swap \\kf for \\t() color transitions so highlight timing follows
    spoken order instead of source order. With plain \\kf the highlight
    would fill in source order — i.e. left-to-right in the reversed layout,
    which is the wrong direction for RTL speech.
    """
    start = _format_ass_time(chunk["start"])
    end = _format_ass_time(chunk["end"])

    if is_rtl:
        # Compute each word's highlight time slot in spoken order, in ms
        # relative to line start (\kf cs * 10).
        words = chunk["words"]
        time_slots = []
        t_ms = 0
        for _, cs in words:
            time_slots.append((t_ms, t_ms + cs * 10))
            t_ms += cs * 10

        # Emit segments in REVERSED visual order. Each carries its own
        # \t() so highlight timing stays tied to spoken order. No per-word
        # RLE/PDF wraps: each override block already splits libass into a
        # separate BiDi run, so wraps are redundant and caused layout drift
        # during \t() color animation. Urdu/Arabic chars are strongly RTL
        # by Unicode property and shape correctly within each word without
        # explicit marks.
        parts = []
        for i in reversed(range(len(words))):
            word, _ = words[i]
            t_start, t_end = time_slots[i]
            parts.append(
                f"{{\\1c&HFFFFFF&\\t({t_start},{t_end},\\1c&H{highlight_color}&)}}"
                f"{word}"
            )
        karaoke_text = " ".join(parts)
    else:
        parts = [f"{{\\kf{cs}}}{word}" for word, cs in chunk["words"]]
        karaoke_text = " ".join(parts)

    return f"Dialogue: 0,{start},{end},{style_name},,0,0,0,,{karaoke_text}"


def generate_captions(
    segments: list[dict],
    output_path: str = "tmp/captions.ass",
    max_words_per_line: int = 4,
    highlight_color: str = "00FFFF",
    target_language: str = "",
) -> str:
    """
    Generate an ASS subtitle file with karaoke-style word highlights.

    Args:
        segments: List of dicts with {start, end, translated_text, words?}.
        output_path: Where to write the .ass file.
        max_words_per_line: Max words per caption chunk (default 4).
        highlight_color: BGR hex color for karaoke fill (default yellow).
        target_language: Target language for RTL detection.

    Returns:
        Path to the generated ASS file.
    """
    Path(output_path).parent.mkdir(parents=True, exist_ok=True)

    is_rtl = target_language in _RTL_LANGUAGES
    wrap_style = 2 if is_rtl else 0
    # Tahoma has reliable Arabic/Urdu shaping across macOS/Windows/Linux ffmpeg
    # builds; Arial often lacks the glyph coverage on headless Linux.
    font = "Tahoma" if is_rtl else "Noto Sans"
    # Encoding 178 = Windows Arabic codepage — hints libass font selection.
    encoding = 178 if is_rtl else 0

    header = f"""\
[Script Info]
Title: VideoVoice Captions
ScriptType: v4.00+
PlayResX: 1920
PlayResY: 1080
WrapStyle: {wrap_style}

[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,{font},52,&H00FFFFFF,&H000000FF,&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,1,2,40,40,50,{encoding}
Style: Karaoke,{font},58,&H00FFFFFF,&H00{highlight_color},&H00000000,&H80000000,-1,0,0,0,100,100,0,0,1,3,0,2,40,40,60,{encoding}

[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""

    lines = [header]
    for seg in segments:
        text = seg.get("caption_text", seg.get("translated_text", seg.get("text", "")))
        if not text or not text.strip():
            continue

        has_words = bool(seg.get("words"))

        if has_words:
            chunks = _build_karaoke_chunks(text, seg["start"], seg["end"], max_words_per_line)
            for chunk in chunks:
                lines.append(_format_karaoke_line(
                    chunk, is_rtl=is_rtl, highlight_color=highlight_color
                ))
        else:
            start = _format_ass_time(seg["start"])
            end = _format_ass_time(seg["end"])
            safe_text = text.replace("\\", "\\\\").replace("{", "\\{").replace("}", "\\}")
            if is_rtl:
                safe_text = f"\u202B{safe_text}\u202C"
            lines.append(f"Dialogue: 0,{start},{end},Default,,0,0,0,,{safe_text}")

    with open(output_path, "w", encoding="utf-8") as f:
        f.write("\n".join(lines) + "\n")

    print(f"[s6b] Captions generated → {output_path} ✓")
    return output_path