"""
WitFoo Training TTS — HuggingFace Space
Multilingual text-to-speech using Microsoft Edge TTS neural voices.
Supports: en, es, fr, de, ja, ar
"""

import gradio as gr
import asyncio
import tempfile
import os
import edge_tts

# High-quality neural voices per language
VOICES = {
    "en": "en-US-GuyNeural",
    "es": "es-MX-JorgeNeural",
    "fr": "fr-FR-HenriNeural",
    "de": "de-DE-ConradNeural",
    "ja": "ja-JP-KeitaNeural",
    "ar": "ar-SA-HamedNeural",
}

# Female voice alternatives
VOICES_FEMALE = {
    "en": "en-US-JennyNeural",
    "es": "es-MX-DaliaNeural",
    "fr": "fr-FR-DeniseNeural",
    "de": "de-DE-KatjaNeural",
    "ja": "ja-JP-NanamiNeural",
    "ar": "ar-SA-ZariyahNeural",
}

SUPPORTED_LANGUAGES = list(VOICES.keys())
LANG_NAMES = {"en": "English", "es": "Spanish", "fr": "French", "de": "German", "ja": "Japanese", "ar": "Arabic"}


async def _generate_async(text: str, voice: str, output_path: str) -> None:
    """Generate speech using Edge TTS."""
    communicate = edge_tts.Communicate(text, voice, rate="-5%")
    await communicate.save(output_path)


def _run_async(coro):
    """Run async coroutine, handling both fresh and existing event loops."""
    try:
        loop = asyncio.get_running_loop()
        import concurrent.futures
        with concurrent.futures.ThreadPoolExecutor() as pool:
            future = pool.submit(asyncio.run, coro)
            return future.result()
    except RuntimeError:
        return asyncio.run(coro)


def generate_speech(text: str, language: str, voice_type: str) -> str:
    """Generate speech in the specified language."""
    if not text.strip():
        raise gr.Error("Text cannot be empty")
    if language not in SUPPORTED_LANGUAGES:
        raise gr.Error(f"Unsupported language: {language}")

    voices = VOICES_FEMALE if voice_type == "Female" else VOICES
    voice = voices[language]

    output_path = tempfile.mktemp(suffix=".mp3")
    _run_async(_generate_async(text, voice, output_path))

    return output_path


def batch_generate(texts: str, language: str, voice_type: str):
    """Generate speech for multiple segments (||| separated)."""
    segments = [t.strip() for t in texts.split("|||") if t.strip()]
    if not segments:
        raise gr.Error("No text segments found. Separate with |||")

    voices = VOICES_FEMALE if voice_type == "Female" else VOICES
    voice = voices.get(language, VOICES["en"])

    results = []
    for i, segment in enumerate(segments):
        print(f"[{i+1}/{len(segments)}] {segment[:60]}...")
        output_path = tempfile.mktemp(suffix=f"_seg{i+1:03d}.mp3")
        _run_async(_generate_async(segment, voice, output_path))
        results.append(output_path)

    return results


# Gradio interface
with gr.Blocks(title="WitFoo Training TTS", theme=gr.themes.Base()) as demo:
    gr.Markdown("# WitFoo Training TTS")
    gr.Markdown("Generate multilingual voiceover for training courses using neural TTS voices.")

    with gr.Tab("Single Generation"):
        with gr.Row():
            with gr.Column():
                text_input = gr.Textbox(label="Narration Text", lines=8,
                    placeholder="Enter narration text to convert to speech...")
                with gr.Row():
                    lang_input = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
                    voice_input = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
                generate_btn = gr.Button("Generate Speech", variant="primary", size="lg")
            with gr.Column():
                audio_output = gr.Audio(label="Generated Speech", type="filepath")

        generate_btn.click(fn=generate_speech, inputs=[text_input, lang_input, voice_input], outputs=audio_output)

    with gr.Tab("Batch Generation"):
        gr.Markdown("Separate text segments with `|||` for batch processing.")
        batch_text = gr.Textbox(label="Texts (||| separated)", lines=12,
            placeholder="First paragraph...\n|||\nSecond paragraph...\n|||\nThird paragraph...")
        with gr.Row():
            batch_lang = gr.Dropdown(choices=SUPPORTED_LANGUAGES, value="en", label="Language")
            batch_voice = gr.Radio(choices=["Male", "Female"], value="Male", label="Voice")
        batch_btn = gr.Button("Generate All Segments", variant="primary", size="lg")
        batch_output = gr.File(label="Generated Audio Files", file_count="multiple")

        batch_btn.click(fn=batch_generate, inputs=[batch_text, batch_lang, batch_voice], outputs=batch_output)

    gr.Markdown("---")
    gr.Markdown("**Voices:** " + " | ".join([f"{LANG_NAMES[k]}: {v}" for k, v in VOICES.items()]))
    gr.Markdown("*WitFoo Training Program — 9 certification courses, 6 languages, 178 lessons*")

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)