from __future__ import annotations

import json
import os
import tempfile
from pathlib import Path

import gradio as gr
import requests

APP_DIR = Path(__file__).resolve().parent
PROMPTS_FILE = APP_DIR / "code_switch_prompts.json"
VOICE_DIR = APP_DIR / "assets" / "voices"
API_URL = os.getenv("INDICVOX_API_URL", "").rstrip("/")
BACKEND_TOKEN = os.getenv("INDICVOX_BACKEND_TOKEN", "")
DEFAULT_PROFILE = "Tamil Focus"
DEFAULT_VOICE = "Tamil Female Research Voice"
DEFAULT_TEXT = "இந்த experimentக்கு clean reference audio use பண்ணணும், இல்லனா output quality drop ஆகும்."
TIMEOUT_S = 600
SESSION = requests.Session()

PROFILES = {
    "Tamil Focus": {
        "description": "Best for Tamil and Tamil-English code-switched prompts.",
    },
    "Hindi Focus": {
        "description": "Best for Hindi and Hindi-English code-switched prompts.",
    },
    "Research Baseline": {
        "description": "Base multilingual checkpoint without paper fine-tuning.",
    },
}

VOICE_PRESETS = {
    "Hindi Research Voice": {
        "path": VOICE_DIR / "hin_m_ref_00.wav",
        "transcript": "लेकिन क्या यह हम सभी कार्यक्रमों के साथ कर सकते?",
        "summary": "Short Hindi reference used for sharper Hindi + English prompting.",
    },
    "Tamil Female Research Voice": {
        "path": VOICE_DIR / "tam_f_ref_00.wav",
        "transcript": "விக்கற நேரத்தையும் லாபத்தையும் பொறுத்து, இந்த டேக்ஸை ஷார்ட் டேர்ம் இல்ல லாங் டேர்ம்னு பிரிப்பாங்க.",
        "summary": "Clear Tamil reference with stable conversational prosody.",
    },
    "Tamil Male Research Voice": {
        "path": VOICE_DIR / "tam_m_ref_00.wav",
        "transcript": "கொரோனா பாதிப்பு காலத்தில் எண்பது கோடி மக்களுக்கு  உணவு தானியம் வழங்கப்பட்டதாகவும் அவர் தெரிவித்தார்.",
        "summary": "Tamil male reference that holds rhythm well on longer prompts.",
    },
    "Text Only": {
        "path": None,
        "transcript": None,
        "summary": "Zero-shot generation without a reference voice clip.",
    },
}

CUSTOM_CSS = """
#app-shell {
    max-width: 1180px;
    margin: 0 auto;
}
#hero {
    padding: 24px 26px 12px 26px;
    border: 1px solid rgba(255, 255, 255, 0.08);
    border-radius: 22px;
    background:
        radial-gradient(circle at top right, rgba(99, 102, 241, 0.16), transparent 34%),
        radial-gradient(circle at bottom left, rgba(16, 185, 129, 0.14), transparent 30%),
        rgba(15, 23, 42, 0.74);
}
.stat-chip {
    display: inline-block;
    margin: 6px 8px 0 0;
    padding: 8px 12px;
    border-radius: 999px;
    background: rgba(255, 255, 255, 0.06);
    font-size: 0.92rem;
}
.footnote {
    opacity: 0.78;
    font-size: 0.94rem;
}
footer {
    visibility: hidden;
}
"""

THEME = gr.themes.Soft(primary_hue="indigo", secondary_hue="emerald")


def load_examples() -> list[list[str]]:
    with PROMPTS_FILE.open("r", encoding="utf-8") as f:
        prompt_bank = json.load(f)

    return [
        [prompt_bank["hi_en"][0]["text"], "Hindi Focus", "Hindi Research Voice"],
        [prompt_bank["hi_en"][9]["text"], "Hindi Focus", "Hindi Research Voice"],
        [prompt_bank["hi_en"][16]["text"], "Hindi Focus", "Hindi Research Voice"],
        [prompt_bank["ta_en"][0]["text"], "Tamil Focus", "Tamil Female Research Voice"],
        [prompt_bank["ta_en"][9]["text"], "Tamil Focus", "Tamil Female Research Voice"],
        [prompt_bank["ta_en"][14]["text"], "Tamil Focus", "Tamil Male Research Voice"],
    ]


EXAMPLES = load_examples()


def profile_markdown(profile_name: str) -> str:
    return f"**{profile_name}**  \n{PROFILES[profile_name]['description']}"


def voice_markdown(voice_name: str) -> str:
    voice = VOICE_PRESETS[voice_name]
    if voice["path"] is None:
        return f"**{voice_name}**  \n{voice['summary']}"
    return (
        f"**{voice_name}**  \n"
        f"{voice['summary']}  \n"
        f"Reference transcript: `{voice['transcript']}`"
    )


def auth_headers() -> dict[str, str]:
    headers: dict[str, str] = {}
    if BACKEND_TOKEN:
        headers["x-api-key"] = BACKEND_TOKEN
    return headers


def backend_status() -> str:
    if not API_URL:
        return "**Backend Not Configured**  \nSet `INDICVOX_API_URL` in Space secrets."

    try:
        response = SESSION.get(f"{API_URL}/health", headers=auth_headers(), timeout=10)
        response.raise_for_status()
        payload = response.json()
    except Exception as exc:
        return (
            f"**Backend Unreachable**  \n"
            f"Endpoint: `{API_URL}`  \n"
            f"Error: `{type(exc).__name__}: {exc}`"
        )

    return (
        f"**VM Backend Ready**  \n"
        f"Endpoint: `{API_URL}`  \n"
        f"GPU: `{payload.get('gpu', 'unknown')}`  \n"
        f"Warm profile: `{payload.get('active_profile', 'unknown')}`  \n"
        f"Uptime: `{payload.get('uptime_s', 'unknown')}s`"
    )


def synthesize(text: str, profile_name: str, voice_name: str, cfg_value: float, inference_steps: int):
    clean_text = text.strip()
    if not clean_text:
        raise gr.Error("Enter a prompt first.")
    if not API_URL:
        raise gr.Error("`INDICVOX_API_URL` is not configured on the Space.")

    response = SESSION.post(
        f"{API_URL}/synthesize",
        headers=auth_headers(),
        json={
            "text": clean_text,
            "profile_name": profile_name,
            "voice_name": voice_name,
            "cfg_value": float(cfg_value),
            "inference_steps": int(inference_steps),
        },
        timeout=TIMEOUT_S,
    )

    if not response.ok:
        detail = response.text
        try:
            detail = response.json().get("detail", detail)
        except Exception:
            pass
        raise gr.Error(f"Backend error {response.status_code}: {detail}")

    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
        f.write(response.content)
        audio_path = f.name

    audio_seconds = response.headers.get("X-IndicVox-Audio-Seconds", "n/a")
    generation_seconds = response.headers.get("X-IndicVox-Generation-Seconds", "n/a")
    rtf = response.headers.get("X-IndicVox-RTF", "n/a")
    gpu = response.headers.get("X-IndicVox-GPU", "unknown")
    status = (
        f"**Ready**  \n"
        f"Profile: `{profile_name}`  \n"
        f"Voice: `{voice_name}`  \n"
        f"GPU backend: `{gpu}`  \n"
        f"Audio length: `{audio_seconds}s`  \n"
        f"Generation time: `{generation_seconds}s`  \n"
        f"RTF: `{rtf}`"
    )
    return audio_path, status


def voice_preview(voice_name: str):
    voice = VOICE_PRESETS[voice_name]
    preview_path = str(voice["path"]) if voice["path"] is not None else None
    return preview_path, voice_markdown(voice_name)


def clear_prompt() -> str:
    return ""


with gr.Blocks() as demo:
    with gr.Column(elem_id="app-shell"):
        gr.HTML(
            """
            <div id="hero">
              <h1>IndicVox</h1>
              <p>Research demo for multilingual TTS across Hindi, Tamil, and code-switched prompts.</p>
              <div>
                <span class="stat-chip">HF Space frontend</span>
                <span class="stat-chip">VM-hosted H100 backend</span>
                <span class="stat-chip">Hindi + Tamil + English prompts</span>
              </div>
            </div>
            """
        )

        with gr.Row():
            with gr.Column(scale=5):
                prompt = gr.Textbox(
                    label="Prompt",
                    value=DEFAULT_TEXT,
                    lines=5,
                    max_lines=8,
                    placeholder="Type Hindi, Tamil, or code-switched text here...",
                )

                with gr.Row():
                    profile = gr.Dropdown(
                        choices=list(PROFILES.keys()),
                        value=DEFAULT_PROFILE,
                        label="Model Profile",
                        info="Switch between the Hindi-tuned and Tamil-tuned research profiles.",
                    )
                    voice = gr.Dropdown(
                        choices=list(VOICE_PRESETS.keys()),
                        value=DEFAULT_VOICE,
                        label="Voice Preset",
                        info="Built-in research voices plus a zero-shot option.",
                    )

                with gr.Accordion("Advanced Settings", open=False):
                    with gr.Row():
                        cfg_value = gr.Slider(
                            minimum=1.0,
                            maximum=4.0,
                            value=2.0,
                            step=0.1,
                            label="CFG",
                        )
                        inference_steps = gr.Slider(
                            minimum=6,
                            maximum=16,
                            value=10,
                            step=1,
                            label="Diffusion Steps",
                        )

                with gr.Row():
                    generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")
                    clear_btn = gr.Button("Clear Prompt")
                    refresh_btn = gr.Button("Refresh Backend Status")

                with gr.Row():
                    profile_info = gr.Markdown(profile_markdown(DEFAULT_PROFILE))
                    voice_info = gr.Markdown(voice_markdown(DEFAULT_VOICE))

            with gr.Column(scale=4):
                backend_info = gr.Markdown(backend_status())
                output_audio = gr.Audio(
                    label="Synthesized Audio",
                    autoplay=False,
                    format="wav",
                )
                generation_info = gr.Markdown("Generate a sample to see timing details.")
                voice_preview_audio = gr.Audio(
                    label="Voice Preset Preview",
                    value=str(VOICE_PRESETS[DEFAULT_VOICE]["path"]),
                    interactive=False,
                    autoplay=False,
                    format="wav",
                )
                gr.Markdown(
                    "Inference runs on the external VM GPU; the Space only provides the paper demo UI.",
                    elem_classes=["footnote"],
                )

        with gr.Tabs():
            with gr.Tab("Hindi + English Examples"):
                gr.Examples(
                    examples=[row for row in EXAMPLES if row[1] == "Hindi Focus"],
                    inputs=[prompt, profile, voice],
                    cache_examples=False,
                )
            with gr.Tab("Tamil + English Examples"):
                gr.Examples(
                    examples=[row for row in EXAMPLES if row[1] == "Tamil Focus"],
                    inputs=[prompt, profile, voice],
                    cache_examples=False,
                )

        gr.Markdown(
            """
            **Demo notes**

            - `Hindi Focus` maps to the Hindi-strong checkpoint from the paper experiments.
            - `Tamil Focus` maps to the Tamil + code-switch checkpoint and is the default for the demo.
            - `Text Only` skips the reference clip and runs zero-shot synthesis.
            """,
            elem_classes=["footnote"],
        )

    demo.load(fn=backend_status, outputs=backend_info, api_name=False)
    generate_btn.click(
        fn=synthesize,
        inputs=[prompt, profile, voice, cfg_value, inference_steps],
        outputs=[output_audio, generation_info],
        api_name="synthesize",
    )
    prompt.submit(
        fn=synthesize,
        inputs=[prompt, profile, voice, cfg_value, inference_steps],
        outputs=[output_audio, generation_info],
        api_name=False,
    )
    profile.change(fn=profile_markdown, inputs=profile, outputs=profile_info, api_name=False)
    voice.change(fn=voice_preview, inputs=voice, outputs=[voice_preview_audio, voice_info], api_name=False)
    clear_btn.click(fn=clear_prompt, outputs=prompt, api_name=False)
    refresh_btn.click(fn=backend_status, outputs=backend_info, api_name=False)

demo.queue(default_concurrency_limit=2, max_size=32)

if __name__ == "__main__":
    demo.launch(theme=THEME, css=CUSTOM_CSS)