"""
Step 4: Translate segment texts using Pollinations chat completions API
(OpenAI-compatible endpoint, no extra API key needed beyond POLLEN_API_KEY).
"""
import re

from .lang._shared import build_client, bedrock_fallback, parse_json_array, MODEL, log_llm_call
from .lang import get_translation_prompt, get_fallback_mode, post_translate


def _translate_batch(segments: list[dict], target_language: str) -> list[dict]:
    """Translate a batch of segments into target_language."""
    if not segments:
        return segments

    # Build single-shot batch: include duration so the LLM can match spoken length
    numbered = "\n".join(
        f"{i+1}. [{s['end'] - s['start']:.1f}s] {s['text']}"
        for i, s in enumerate(segments)
    )

    # Default prompt (generic, works for most languages)
    default_prompt = (
    f"You are a voice-over dubbing writer — not a translator. "
    f"Your job is to write what a native {target_language} speaker would *actually say out loud* "
    f"in a casual, natural conversation. Forget the source words. Capture the meaning, tone, and energy.\n\n"

    f"INPUT FORMAT:\n"
    f"Numbered lines with a spoken duration in brackets, e.g.: 1. [4.6s] Hello there\n\n"

    f"OUTPUT FORMAT:\n"
    f"A JSON array of {target_language} strings — one per input line, in order. "
    f"No numbering, no brackets, no extra text.\n"
    f'Shape: ["<first line translated into {target_language}>", "<second line translated into {target_language}>"]\n\n'

    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
    f"SCORING RUBRIC — evaluate every line against these before outputting:\n"
    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"

    f"[1] NATURALNESS — weight: HIGH\n"
    f"  Would a native speaker actually say this in real life?\n"
    f"  ✗ Fail: dictionary phrasing, formal register, textbook grammar\n"
    f"  ✓ Pass: contractions, colloquial rhythm, everyday vocabulary\n"
    f"  Ask yourself: 'Would I hear this in a TV show or on the street?' If no → rewrite.\n\n"

    f"[2] SPOKEN FIT — weight: CRITICAL\n"
    f"  The line will be read by TTS within the duration shown in brackets.\n"
    f"  Fewer words is almost always safer. Aim for 70–80% of the original word count.\n"
    f"  ✗ Fail: translation is longer or same length as the English\n"
    f"  ✓ Pass: shorter, with no loss of core meaning or emotional tone\n"
    f"  Trick: cut filler, merge ideas, use contractions and short-form spoken words.\n\n"

    f"[3] TTS READABILITY — weight: HIGH\n"
    f"  Long sentences with multiple commas trip up TTS engines.\n"
    f"  ✗ Fail: 'She met him, her true love, on a rainy evening, in the city she once fled.'\n"
    f"  ✓ Pass: 'She met him on a rainy evening. Her true love. In the city she once fled.'\n"
    f"  Short beats. Natural pauses. Each sentence punches clean.\n\n"

    f"[4] EMOTIONAL REGISTER — weight: HIGH\n"
    f"  Match the tone of the original: casual, urgent, tender, funny, sarcastic — whatever it is.\n"
    f"  ✗ Fail: a sarcastic line becomes polite; a tender moment becomes clinical\n"
    f"  ✓ Pass: the emotional texture is preserved even if the words are completely different\n\n"

    f"[5] TRANSLATION PURITY — weight: MEDIUM\n"
    f"  Every word in the output must be {target_language}. No words from the original "
    f"language should leak through.\n"
    f"  This includes: filler words (Oh, Hmm, Well, So, Right when not native to "
    f"{target_language}), names used as exclamations, brand-style interjections. "
    f"Find the {target_language} equivalent every time.\n\n"

    f"[6] WORD-FOR-WORD TRAP — weight: HIGH (avoid this)\n"
    f"  Do NOT translate word by word. No one speaks that way.\n"
    f"  ✗ Fail: a literal one-to-one rendering that preserves the source word order\n"
    f"  ✓ Pass: a restructured line that reads naturally in {target_language} "
    f"while keeping the same meaning\n"
    f"  Restructure freely. {target_language} has its own natural word order — use it.\n\n"

    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n"
    f"BEFORE RETURNING OUTPUT:\n"
    f"For each line, silently run this checklist:\n"
    f"  □ Would a native speaker say this naturally out loud?\n"
    f"  □ Is it shorter than the English original?\n"
    f"  □ Are there any commas that create awkward TTS pauses? → break into short sentences\n"
    f"  □ Does the emotional tone match?\n"
    f"  □ Are there any English words hiding in the output?\n"
    f"If any box fails → rewrite that line. Then output.\n"
    f"━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\n\n"

    f"Return ONLY the JSON array. No preamble, no explanation, no duration prefixes."
    )

    # Let language-specific handler override the prompt if needed
    system_prompt = get_translation_prompt(target_language, default_prompt)

    expected = len(segments)
    strict_prompt = (
        system_prompt
        + f"\n\nCRITICAL: You MUST return exactly {expected} items in the JSON array "
        f"— one per input line. Do NOT merge, skip, or split any lines."
    )

    client = build_client()
    max_retries = 2
    try:
        for attempt in range(1, max_retries + 1):
            response = client.chat.completions.create(
                model=MODEL,
                messages=[
                    {"role": "system", "content": strict_prompt},
                    {"role": "user", "content": numbered},
                ],
                temperature=0.2,
            )

            raw = response.choices[0].message.content.strip()
            log_llm_call(
                step="s3_translate", provider="pollinations", model=MODEL,
                system_prompt=strict_prompt, user_prompt=numbered,
                response=raw, temperature=0.2,
            )
            translated_list = parse_json_array(raw)

            if len(translated_list) == expected:
                break

            print(f"[s3] Pollinations returned {len(translated_list)}/{expected} items (attempt {attempt}/{max_retries})")
            if attempt == max_retries:
                raise ValueError(
                    f"Translation returned {len(translated_list)} items but expected {expected} after {max_retries} attempts"
                )

        cleaned = [re.sub(r'^\d+[\.\)\-]\s*', '', t) for t in translated_list]

        result = []
        for seg, translated_text in zip(segments, cleaned):
            result.append({**seg, "translated_text": translated_text})

        print(f"[s3] Translating via Pollinations complete ✓")
        return result

    except Exception as e:
        print(f"[s3] Pollinations translation error ({e}) — using fallback.")

        # Language-specific fallback routing
        if get_fallback_mode(target_language) == "bedrock":
            return bedrock_fallback(segments, numbered, system_prompt)

        # Default: Google Translate
        from deep_translator import GoogleTranslator
        try:
            translator = GoogleTranslator(source='auto', target=target_language.lower())
        except Exception as e2:
            print(f"[s3] Fallback failed to init translator ({e2})")
            raise

        result = []
        for seg in segments:
            translated_text = translator.translate(seg["text"])
            result.append({**seg, "translated_text": translated_text})

        print(f"[s3] Translation via fallback complete ✓")
        return result


def translate(segments: list[dict], target_language: str) -> list[dict]:
    """
    Translate the text of each segment into target_language in batches.

    Args:
        segments: List of {start, end, text} dicts.
        target_language: Full language name, e.g. "Spanish", "French", "Hindi".

    Returns:
        Same list with 'translated_text' added to each segment.
        Language-specific fields (e.g. 'tts_text') may also be added.
    """
    if not segments:
        return segments

    print(f"[s3] Translating {len(segments)} segments → {target_language} (in batches)...")

    BATCH_SIZE = 15
    final_result = []

    for i in range(0, len(segments), BATCH_SIZE):
        batch = segments[i:i + BATCH_SIZE]
        if len(segments) > BATCH_SIZE:
            print(f"[s3] Processing batch {i//BATCH_SIZE + 1} ({len(batch)} items)...")
        batch_result = _translate_batch(batch, target_language)
        final_result.extend(batch_result)

    # Run language-specific post-processing (e.g., Urdu transliteration)
    final_result = post_translate(final_result, target_language)

    return final_result