#!/usr/bin/env python3
"""
JamboGPT - African Language AI Voice Agent
Multiple TTS Models for Kiswahili & Kikuyu
"""

import gradio as gr
from datetime import datetime
import torch
from transformers import pipeline
import numpy as np
from scipy.io import wavfile
import tempfile

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Language configurations with multiple TTS models
LANGUAGES = {
    "Swahili": {
        "emoji": "🇰🇪",
        "speakers": "100M+",
        "region": "East Africa",
        "tts_models": [
            ("Benjamin-png/swahili-mms-tts-finetuned", "🌟 JamboGPT Voice 1 (Best Quality)"),
            ("facebook/mms-tts-swh", "JamboGPT Voice 2"),
            ("multilingual-tts/F5-TTS-OpenBible-Swahili", "JamboGPT Voice 3"),
            ("stano03/jambogpt-swahili-tts-v1", "JamboGPT Voice 4 (Custom)"),
        ],
        "default_model": "Benjamin-png/swahili-mms-tts-finetuned",
        "keywords": {
            "greeting": ["habari", "jambo", "salaam", "hello", "hi"],
            "thanks": ["asante", "thank", "shukran"],
            "help": ["help", "msaada", "niweza"],
            "bye": ["kwaheri", "goodbye", "bye", "ciao"]
        },
        "responses": {
            "greeting": "Habari! Niko hapa kusaidia. Unajifunza nini leo?",
            "help": "Niweza kusaidia kwa swahili. Tafadhali niambie unajifunza nini.",
            "thanks": "Asante sana! Niko hapa kila wakati.",
            "bye": "Kwaheri! Karibu tena mwingine wakati.",
            "default": "Ndiyo, nimeelewa. Unaweza kusema zaidi?"
        }
    },
    "Kikuyu": {
        "emoji": "🇰🇪",
        "speakers": "7M",
        "region": "Kenya",
        "tts_models": [
            ("facebook/mms-tts-kin", "🌟 JamboGPT Voice 1 (Best Quality)"),
            ("multilingual-tts/F5-TTS-OpenBible-Kikuyu", "JamboGPT Voice 2"),
            ("multilingual-tts/VITS-OpenBible-Kikuyu", "JamboGPT Voice 3"),
        ],
        "default_model": "facebook/mms-tts-kin",
        "keywords": {
            "greeting": ["wĩ", "mwega", "hello", "hi", "salaam"],
            "thanks": ["mwega", "thank", "asante"],
            "help": ["help", "msaada"],
            "bye": ["rĩa", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Wĩ mwega! Nĩ ũndũ ũrĩkũ?",
            "help": "Nĩ mwega! Nĩkĩo kĩndũ kĩrĩa ũrĩ na kĩo?",
            "thanks": "Mwega muno! Nĩ mwega.",
            "bye": "Rĩa rĩu! Wĩ mwega!",
            "default": "Nĩguo mwega! Wĩ ũrĩa mwega?"
        }
    },
    "Yoruba": {
        "emoji": "🇳🇬",
        "speakers": "45M",
        "region": "West Africa",
        "tts_models": [
            ("facebook/mms-tts-yor", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-yor",
        "keywords": {
            "greeting": ["pele", "hello", "hi", "bawo"],
            "thanks": ["e ku", "thank", "ope"],
            "help": ["help", "lowo"],
            "bye": ["daabo", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Pẹlẹ o! Bawo ni o se?",
            "help": "Mo le lọwọ rẹ. Kini nkan ti o nilo?",
            "thanks": "E ku ọpẹ! Ẹ kú àrọ!",
            "bye": "Ó dáàbò! Ẹ kú ọjọ́!",
            "default": "Yoo, mo gbe e. Kini nkan ti o nilo?"
        }
    },
    "Hausa": {
        "emoji": "🇳🇬",
        "speakers": "90M",
        "region": "West Africa",
        "tts_models": [
            ("facebook/mms-tts-hau", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-hau",
        "keywords": {
            "greeting": ["sannu", "hello", "hi", "ina"],
            "thanks": ["nagode", "thank"],
            "help": ["taimaka", "help"],
            "bye": ["sai", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Sannu! Ina kwana?",
            "help": "Ina iya taimakawa ka. Me na gida!",
            "thanks": "Nagode! Na gida!",
            "bye": "Sai anjima! Jiya!",
            "default": "I na gida. Me na gida?"
        }
    },
    "Amharic": {
        "emoji": "🇪🇹",
        "speakers": "32M",
        "region": "Horn of Africa",
        "tts_models": [
            ("facebook/mms-tts-amh", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-amh",
        "keywords": {
            "greeting": ["ሰላም", "hello", "hi", "ሳላም"],
            "thanks": ["አመሰግናለሁ", "thank"],
            "help": ["ሚዛን", "help"],
            "bye": ["ደህና", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "ሰላም! ደህና! ምን ያስፈልግሃል?",
            "help": "ሚዛን! እንታይ ትደልዩ?",
            "thanks": "አመሰግናለሁ! ደህና!",
            "bye": "ሰላም! ሚዛን!",
            "default": "ሙሊ። እንታይ ተወሳኺ?"
        }
    },
    "Fon": {
        "emoji": "🇧🇯",
        "speakers": "2M",
        "region": "West Africa",
        "tts_models": [
            ("facebook/mms-tts-fon", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-fon",
        "keywords": {
            "greeting": ["bonjour", "hello", "hi"],
            "thanks": ["merci", "thank"],
            "help": ["aide", "help"],
            "bye": ["au revoir", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Bonjour! Comment allez-vous?",
            "help": "Je peux vous aider. Qu'est-ce que vous voulez?",
            "thanks": "Merci beaucoup! De rien!",
            "bye": "Au revoir! À bientôt!",
            "default": "Oui, je comprends. Quoi d'autre?"
        }
    },
    "Oromo": {
        "emoji": "🇪🇹",
        "speakers": "40M",
        "region": "East Africa",
        "tts_models": [
            ("facebook/mms-tts-orm", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-orm",
        "keywords": {
            "greeting": ["salaam", "hello", "hi"],
            "thanks": ["galataa", "thank"],
            "help": ["gargaarsa", "help"],
            "bye": ["nagaa", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Salaam! Akkam jirtaa?",
            "help": "Gargaarsa nan geedaru. Maal barbaadda?",
            "thanks": "Galataa! Nagaa!",
            "bye": "Nagaa! Haa jiraatin!",
            "default": "Eeyyee, hubadha. Maal biraa?"
        }
    },
    "Somali": {
        "emoji": "🇸🇴",
        "speakers": "20M",
        "region": "East Africa",
        "tts_models": [
            ("facebook/mms-tts-som", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-som",
        "keywords": {
            "greeting": ["salaam", "hello", "hi"],
            "thanks": ["mahadsanid", "thank"],
            "help": ["caawi", "help"],
            "bye": ["nabad", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "Salaam! Sidee tahay?",
            "help": "Waan kaa caawin karaa. Maxaa baahan?",
            "thanks": "Mahadsanid! Nabad!",
            "bye": "Nabad! Halkaa ku joog!",
            "default": "Hah, waan fahmay. Maxaa kale?"
        }
    },
    "Tigrinya": {
        "emoji": "🇪🇷",
        "speakers": "7M",
        "region": "Horn of Africa",
        "tts_models": [
            ("facebook/mms-tts-tir", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-tir",
        "keywords": {
            "greeting": ["ሰላም", "hello", "hi"],
            "thanks": ["አመሰግናለሁ", "thank"],
            "help": ["ሚዛን", "help"],
            "bye": ["ደህና", "goodbye", "bye"]
        },
        "responses": {
            "greeting": "ሰላም! ዴሌ ኢካ?",
            "help": "ሚዛን! እንታይ ትደልዩ?",
            "thanks": "አመሰግናለሁ! ደህና!",
            "bye": "ሰላም! ሚዛን!",
            "default": "ሙሊ። እንታይ ተወሳኺ?"
        }
    },
    "English": {
        "emoji": "🌍",
        "speakers": "1.5B",
        "region": "Global",
        "tts_models": [
            ("facebook/mms-tts-eng", "JamboGPT Voice 1"),
        ],
        "default_model": "facebook/mms-tts-eng",
        "keywords": {
            "greeting": ["hello", "hi", "hey", "greetings"],
            "thanks": ["thank", "thanks", "appreciate"],
            "help": ["help", "assist"],
            "bye": ["bye", "goodbye", "farewell"]
        },
        "responses": {
            "greeting": "Hello! How can I help you today?",
            "help": "I can help you with English. What would you like to know?",
            "thanks": "Thank you! Happy to help!",
            "bye": "Goodbye! See you later!",
            "default": "I understand. What else can I help you with?"
        }
    }
}

conversation_history = []
model_cache = {}

def load_tts_model(model_id):
    """Load TTS model."""
    if model_id in model_cache:
        return model_cache[model_id]
    
    try:
        print(f"Loading TTS model: {model_id}")
        synthesizer = pipeline(
            "text-to-speech",
            model=model_id,
            device=device if device == "cuda" else -1
        )
        model_cache[model_id] = synthesizer
        return synthesizer
    except Exception as e:
        print(f"Error loading model {model_id}: {e}")
        return None

def detect_intent(text, language):
    """Detect user intent from text."""
    text_lower = text.lower()
    lang_config = LANGUAGES.get(language, {})
    keywords = lang_config.get("keywords", {})
    
    for intent, words in keywords.items():
        for word in words:
            if word.lower() in text_lower:
                return intent
    
    return "default"

def generate_response(text, language):
    """Generate a response based on user input."""
    try:
        lang_config = LANGUAGES.get(language, {})
        responses = lang_config.get("responses", {})
        
        intent = detect_intent(text, language)
        response = responses.get(intent, responses.get("default", "I understand."))
        
        return response
    except Exception as e:
        print(f"Error generating response: {e}")
        return "I understand. Can you say more?"

def synthesize_speech(text, language, model_name):
    """Convert text to speech using selected model."""
    if not text or not text.strip():
        return None
    
    try:
        synthesizer = load_tts_model(model_name)
        if synthesizer is None:
            return None
        
        print(f"Generating speech with {model_name}: {text[:50]}...")
        speech = synthesizer(text)
        
        audio_array = np.array(speech["audio"]).flatten()
        sample_rate = speech["sampling_rate"]
        
        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
            wavfile.write(f.name, sample_rate, (audio_array * 32767).astype(np.int16))
            temp_path = f.name
        
        return temp_path
    except Exception as e:
        print(f"Error synthesizing: {e}")
        return None

def process_text_input(text, language, tts_model):
    """Process text input: generate response -> synthesize."""
    try:
        if not text:
            return None, "Please enter some text!", ""
        
        response_text = generate_response(text, language)
        if response_text is None:
            return None, "Error generating response", ""
        
        audio_output = synthesize_speech(response_text, language, tts_model)
        
        conversation_history.append({
            "user": text,
            "agent": response_text,
            "language": language,
            "model": tts_model,
            "timestamp": datetime.now().strftime("%H:%M:%S")
        })
        
        history_text = ""
        for msg in conversation_history[-5:]:
            history_text += f"[{msg['timestamp']}] {msg['language']}\n"
            history_text += f"You: {msg['user']}\n"
            history_text += f"Agent: {msg['agent']}\n\n"
        
        return audio_output, response_text, history_text
    except Exception as e:
        print(f"Error processing: {e}")
        return None, f"Error: {str(e)}", ""

def create_interface():
    """Create the voice agent interface."""
    
    with gr.Blocks(
        title="JamboGPT - African Language AI Voice Agent",
        theme=gr.themes.Soft(primary_hue="purple")
    ) as demo:
        
        gr.Markdown("""
        # 🌍 JamboGPT - African Language AI Voice Agent
        
        **Chat with AI in 10 African languages with multiple voice options**
        
        Swahili • Kikuyu • Yoruba • Hausa • Amharic • Fon • Oromo • Somali • Tigrinya • English
        """)
        
        with gr.Group():
            # Language selector
            language_choice = gr.Dropdown(
                choices=list(LANGUAGES.keys()),
                value="Swahili",
                label="Select Language",
                interactive=True
            )
            
            # Language info
            language_info = gr.Markdown(
                f"🇰🇪 **Swahili** • 100M+ speakers • East Africa"
            )
            
            # TTS Model selector (dynamic based on language)
            tts_model_choice = gr.Dropdown(
                choices=[("🌟 JamboGPT Voice 1 (Best Quality)", "Benjamin-png/swahili-mms-tts-finetuned"),
                        ("JamboGPT Voice 2", "facebook/mms-tts-swh"),
                        ("JamboGPT Voice 3", "multilingual-tts/F5-TTS-OpenBible-Swahili"),
                        ("JamboGPT Voice 4 (Custom)", "stano03/jambogpt-swahili-tts-v1")],
                value="Benjamin-png/swahili-mms-tts-finetuned",
                label="Select Voice Model",
                interactive=True
            )
            
            def update_language_info(language):
                if language in LANGUAGES:
                    lang_data = LANGUAGES[language]
                    models = lang_data.get("tts_models", [])
                    
                    # Update language info
                    info_text = f"{lang_data['emoji']} **{language}** • {lang_data['speakers']} speakers • {lang_data['region']}"
                    
                    # Update model choices
                    model_choices = models
                    default_model = lang_data.get("default_model", models[0][0])
                    
                    return info_text, gr.Dropdown(choices=model_choices, value=default_model)
                return "", gr.Dropdown(choices=[])
            
            language_choice.change(
                update_language_info, 
                inputs=language_choice, 
                outputs=[language_info, tts_model_choice]
            )
            
            # Text input
            text_input = gr.Textbox(
                label="Type your message",
                placeholder="Type in your selected language...",
                lines=3,
                interactive=True
            )
            
            # Process button
            process_btn = gr.Button(
                "🎤 Generate Response",
                variant="primary",
                size="lg"
            )
        
        # Output section
        with gr.Group():
            agent_response = gr.Textbox(
                label="🤖 Agent Response",
                interactive=False,
                placeholder="The agent's response will appear here"
            )
            
            audio_output = gr.Audio(
                label="🔊 Agent Voice",
                type="filepath",
                interactive=False
            )
            
            history_display = gr.Textbox(
                label="📝 Conversation History",
                interactive=False,
                lines=4,
                placeholder="Your conversation history will appear here"
            )
        
        # Connect process button
        process_btn.click(
            fn=process_text_input,
            inputs=[text_input, language_choice, tts_model_choice],
            outputs=[audio_output, agent_response, history_display]
        )
        
        # Examples
        gr.Examples(
            examples=[
                ["Habari, karibu sana!", "Swahili"],
                ["Wĩ mwega, karibu!", "Kikuyu"],
                ["Pẹlẹ o, bawo ni o se?", "Yoruba"],
                ["Hello, how are you?", "English"],
            ],
            inputs=[text_input, language_choice],
            outputs=[audio_output, agent_response],
            fn=process_text_input,
            cache_examples=False,
        )
        
        gr.Markdown("""
        ---
        **JamboGPT** - Making AI Accessible to African Languages
        
        🔗 [GitHub](https://github.com/stano03/jambogpt) | 📊 [Dataset](https://huggingface.co/datasets/stano03/jambogpt-real-dataset) | 🤖 [Models](https://huggingface.co/stano03)
        """)
    
    return demo

if __name__ == "__main__":
    print("🚀 Creating JamboGPT Voice Agent Interface...")
    demo = create_interface()
    
    print("=" * 50)
    print("✅ JamboGPT Voice Agent is ready!")
    print("=" * 50)
    
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )