Spaces:

stano03
/

jambogpt

Sleeping

App Files Files Community

JamboGPT Bot commited on 4 days ago

Commit

504b4e1

1 Parent(s): dd6ced7

Add free Hugging Face TTS models for voice generation

Browse files

Files changed (1) hide show

app.py +117 -31

app.py CHANGED Viewed

@@ -1,18 +1,27 @@
 #!/usr/bin/env python3
 """
 JamboGPT - African Language AI Voice Agent
-Simple, Lightweight Version for Hugging Face Spaces
 """
 import gradio as gr
 from datetime import datetime
-# Language configurations
 LANGUAGES = {
     "Swahili": {
         "emoji": "🇰🇪",
         "speakers": "100M+",
         "region": "East Africa",
         "keywords": {
             "greeting": ["habari", "jambo", "salaam", "hello", "hi"],
             "thanks": ["asante", "thank", "shukran"],
@@ -31,6 +40,7 @@ LANGUAGES = {
         "emoji": "🇰🇪",
         "speakers": "7M",
         "region": "Kenya",
         "keywords": {
             "greeting": ["wĩ", "mwega", "hello", "hi", "salaam"],
             "thanks": ["mwega", "thank", "asante"],
@@ -49,6 +59,7 @@ LANGUAGES = {
         "emoji": "🇳🇬",
         "speakers": "45M",
         "region": "West Africa",
         "keywords": {
             "greeting": ["pele", "hello", "hi", "bawo"],
             "thanks": ["e ku", "thank", "ope"],
@@ -67,6 +78,7 @@ LANGUAGES = {
         "emoji": "🇳🇬",
         "speakers": "90M",
         "region": "West Africa",
         "keywords": {
             "greeting": ["sannu", "hello", "hi", "ina"],
             "thanks": ["nagode", "thank"],
@@ -85,6 +97,7 @@ LANGUAGES = {
         "emoji": "🇪🇹",
         "speakers": "32M",
         "region": "Horn of Africa",
         "keywords": {
             "greeting": ["ሰላም", "hello", "hi", "ሳላም"],
             "thanks": ["አመሰግናለሁ", "thank"],
@@ -103,6 +116,7 @@ LANGUAGES = {
         "emoji": "🇧🇯",
         "speakers": "2M",
         "region": "West Africa",
         "keywords": {
             "greeting": ["bonjour", "hello", "hi"],
             "thanks": ["merci", "thank"],
@@ -121,6 +135,7 @@ LANGUAGES = {
         "emoji": "🇪🇹",
         "speakers": "40M",
         "region": "East Africa",
         "keywords": {
             "greeting": ["salaam", "hello", "hi"],
             "thanks": ["galataa", "thank"],
@@ -139,6 +154,7 @@ LANGUAGES = {
         "emoji": "🇸🇴",
         "speakers": "20M",
         "region": "East Africa",
         "keywords": {
             "greeting": ["salaam", "hello", "hi"],
             "thanks": ["mahadsanid", "thank"],
@@ -157,6 +173,7 @@ LANGUAGES = {
         "emoji": "🇪🇷",
         "speakers": "7M",
         "region": "Horn of Africa",
         "keywords": {
             "greeting": ["ሰላም", "hello", "hi"],
             "thanks": ["አመሰግናለሁ", "thank"],
@@ -175,6 +192,7 @@ LANGUAGES = {
         "emoji": "🌍",
         "speakers": "1.5B",
         "region": "Global",
         "keywords": {
             "greeting": ["hello", "hi", "hey", "greetings"],
             "thanks": ["thank", "thanks", "appreciate"],
@@ -192,6 +210,31 @@ LANGUAGES = {
 }
 conversation_history = []
 def detect_intent(text, language):
     """Detect user intent from text."""
@@ -216,31 +259,83 @@ def generate_response(text, language):
         intent = detect_intent(text, language)
         response = responses.get(intent, responses.get("default", "I understand."))
         # Add to history
         conversation_history.append({
             "user": text,
-            "agent": response,
             "language": language,
             "timestamp": datetime.now().strftime("%H:%M:%S")
         })
-        return response
     except Exception as e:
-        print(f"Error generating response: {e}")
-        return "I understand. Can you say more?"
 def create_interface():
     """Create the voice agent interface."""
     with gr.Blocks(
-        title="JamboGPT - African Language AI",
         theme=gr.themes.Soft(primary_hue="purple")
     ) as demo:
         gr.Markdown("""
-        # 🌍 JamboGPT - African Language AI
-        **Chat with AI in 10 African languages**
         Swahili • Kikuyu • Yoruba • Hausa • Amharic • Fon • Oromo • Somali • Tigrinya • English
         """)
@@ -290,6 +385,12 @@ def create_interface():
                 placeholder="The agent's response will appear here"
             )
             # Conversation history
             history_display = gr.Textbox(
                 label="📝 Conversation History",
@@ -299,25 +400,10 @@ def create_interface():
             )
         # Connect process button
-        def process_input(text, language):
-            if not text:
-                return "Please enter some text!", ""
-            response = generate_response(text, language)
-            # Format history
-            history_text = ""
-            for msg in conversation_history[-5:]:  # Show last 5 messages
-                history_text += f"[{msg['timestamp']}] {msg['language']}\n"
-                history_text += f"You: {msg['user']}\n"
-                history_text += f"Agent: {msg['agent']}\n\n"
-            return response, history_text
         process_btn.click(
-            fn=process_input,
             inputs=[text_input, language_choice],
-            outputs=[agent_response, history_display]
         )
         # Examples
@@ -329,8 +415,8 @@ def create_interface():
                 ["Hello, how are you?", "English"],
             ],
             inputs=[text_input, language_choice],
-            outputs=[agent_response],
-            fn=process_input,
             cache_examples=False,
         )
@@ -338,17 +424,17 @@ def create_interface():
         ---
         **JamboGPT** - Making AI Accessible to African Languages
-        🔗 [GitHub](https://github.com/stano03/jambogpt) | 📊 [Dataset](https://huggingface.co/datasets/stano03/jambogpt-real-dataset)
         """)
     return demo
 if __name__ == "__main__":
-    print("🚀 Creating JamboGPT Interface...")
     demo = create_interface()
     print("=" * 50)
-    print("✅ JamboGPT is ready!")
     print("=" * 50)
     demo.launch(

 #!/usr/bin/env python3
 """
 JamboGPT - African Language AI Voice Agent
+Using Free Hugging Face TTS Models
 """
 import gradio as gr
 from datetime import datetime
+import torch
+from transformers import pipeline
+import numpy as np
+from scipy.io import wavfile
+import tempfile
+# Set device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Language configurations with free HF TTS models
 LANGUAGES = {
     "Swahili": {
         "emoji": "🇰🇪",
         "speakers": "100M+",
         "region": "East Africa",
+        "tts_model": "facebook/mms-tts-swh",
         "keywords": {
             "greeting": ["habari", "jambo", "salaam", "hello", "hi"],
             "thanks": ["asante", "thank", "shukran"],
         "emoji": "🇰🇪",
         "speakers": "7M",
         "region": "Kenya",
+        "tts_model": "facebook/mms-tts-kin",
         "keywords": {
             "greeting": ["wĩ", "mwega", "hello", "hi", "salaam"],
             "thanks": ["mwega", "thank", "asante"],
         "emoji": "🇳🇬",
         "speakers": "45M",
         "region": "West Africa",
+        "tts_model": "facebook/mms-tts-yor",
         "keywords": {
             "greeting": ["pele", "hello", "hi", "bawo"],
             "thanks": ["e ku", "thank", "ope"],
         "emoji": "🇳🇬",
         "speakers": "90M",
         "region": "West Africa",
+        "tts_model": "facebook/mms-tts-hau",
         "keywords": {
             "greeting": ["sannu", "hello", "hi", "ina"],
             "thanks": ["nagode", "thank"],
         "emoji": "🇪🇹",
         "speakers": "32M",
         "region": "Horn of Africa",
+        "tts_model": "facebook/mms-tts-amh",
         "keywords": {
             "greeting": ["ሰላም", "hello", "hi", "ሳላም"],
             "thanks": ["አመሰግናለሁ", "thank"],
         "emoji": "🇧🇯",
         "speakers": "2M",
         "region": "West Africa",
+        "tts_model": "facebook/mms-tts-fon",
         "keywords": {
             "greeting": ["bonjour", "hello", "hi"],
             "thanks": ["merci", "thank"],
         "emoji": "🇪🇹",
         "speakers": "40M",
         "region": "East Africa",
+        "tts_model": "facebook/mms-tts-orm",
         "keywords": {
             "greeting": ["salaam", "hello", "hi"],
             "thanks": ["galataa", "thank"],
         "emoji": "🇸🇴",
         "speakers": "20M",
         "region": "East Africa",
+        "tts_model": "facebook/mms-tts-som",
         "keywords": {
             "greeting": ["salaam", "hello", "hi"],
             "thanks": ["mahadsanid", "thank"],
         "emoji": "🇪🇷",
         "speakers": "7M",
         "region": "Horn of Africa",
+        "tts_model": "facebook/mms-tts-tir",
         "keywords": {
             "greeting": ["ሰላም", "hello", "hi"],
             "thanks": ["አመሰግናለሁ", "thank"],
         "emoji": "🌍",
         "speakers": "1.5B",
         "region": "Global",
+        "tts_model": "facebook/mms-tts-eng",
         "keywords": {
             "greeting": ["hello", "hi", "hey", "greetings"],
             "thanks": ["thank", "thanks", "appreciate"],
 }
 conversation_history = []
+model_cache = {}
+def load_tts_model(language_name):
+    """Load TTS model for the specified language."""
+    if language_name not in LANGUAGES:
+        return None
+    lang_config = LANGUAGES[language_name]
+    model_id = lang_config["tts_model"]
+    if model_id in model_cache:
+        return model_cache[model_id]
+    try:
+        print(f"Loading TTS model for {language_name}: {model_id}")
+        synthesizer = pipeline(
+            "text-to-speech",
+            model=model_id,
+            device=device if device == "cuda" else -1
+        )
+        model_cache[model_id] = synthesizer
+        return synthesizer
+    except Exception as e:
+        print(f"Error loading model {model_id}: {e}")
+        return None
 def detect_intent(text, language):
     """Detect user intent from text."""
         intent = detect_intent(text, language)
         response = responses.get(intent, responses.get("default", "I understand."))
+        return response
+    except Exception as e:
+        print(f"Error generating response: {e}")
+        return "I understand. Can you say more?"
+def synthesize_speech(text, language):
+    """Convert text to speech using HF models."""
+    if not text or not text.strip():
+        return None
+    try:
+        synthesizer = load_tts_model(language)
+        if synthesizer is None:
+            return None
+        print(f"Generating speech for: {text[:50]}...")
+        speech = synthesizer(text)
+        audio_array = np.array(speech["audio"]).flatten()
+        sample_rate = speech["sampling_rate"]
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            wavfile.write(f.name, sample_rate, (audio_array * 32767).astype(np.int16))
+            temp_path = f.name
+        return temp_path
+    except Exception as e:
+        print(f"Error synthesizing: {e}")
+        return None
+def process_text_input(text, language):
+    """Process text input: generate response -> synthesize."""
+    try:
+        if not text:
+            return None, "Please enter some text!", ""
+        # Generate response
+        response_text = generate_response(text, language)
+        if response_text is None:
+            return None, "Error generating response", ""
+        # Synthesize response
+        audio_output = synthesize_speech(response_text, language)
         # Add to history
         conversation_history.append({
             "user": text,
+            "agent": response_text,
             "language": language,
             "timestamp": datetime.now().strftime("%H:%M:%S")
         })
+        # Format history
+        history_text = ""
+        for msg in conversation_history[-5:]:
+            history_text += f"[{msg['timestamp']}] {msg['language']}\n"
+            history_text += f"You: {msg['user']}\n"
+            history_text += f"Agent: {msg['agent']}\n\n"
+        status = "✅ Speech generated!" if audio_output else "⚠️ Text response only"
+        return audio_output, response_text, history_text
     except Exception as e:
+        print(f"Error processing: {e}")
+        return None, f"Error: {str(e)}", ""
 def create_interface():
     """Create the voice agent interface."""
     with gr.Blocks(
+        title="JamboGPT - African Language AI Voice Agent",
         theme=gr.themes.Soft(primary_hue="purple")
     ) as demo:
         gr.Markdown("""
+        # 🌍 JamboGPT - African Language AI Voice Agent
+        **Chat with AI in 10 African languages with voice responses**
         Swahili • Kikuyu • Yoruba • Hausa • Amharic • Fon • Oromo • Somali • Tigrinya • English
         """)
                 placeholder="The agent's response will appear here"
             )
+            audio_output = gr.Audio(
+                label="🔊 Agent Voice",
+                type="filepath",
+                interactive=False
+            )
             # Conversation history
             history_display = gr.Textbox(
                 label="📝 Conversation History",
             )
         # Connect process button
         process_btn.click(
+            fn=process_text_input,
             inputs=[text_input, language_choice],
+            outputs=[audio_output, agent_response, history_display]
         )
         # Examples
                 ["Hello, how are you?", "English"],
             ],
             inputs=[text_input, language_choice],
+            outputs=[audio_output, agent_response],
+            fn=process_text_input,
             cache_examples=False,
         )
         ---
         **JamboGPT** - Making AI Accessible to African Languages
+        🔗 [GitHub](https://github.com/stano03/jambogpt) | 📊 [Dataset](https://huggingface.co/datasets/stano03/jambogpt-real-dataset) | 🤖 [Model](https://huggingface.co/stano03/jambogpt-swahili-tts-v1)
         """)
     return demo
 if __name__ == "__main__":
+    print("🚀 Creating JamboGPT Voice Agent Interface...")
     demo = create_interface()
     print("=" * 50)
+    print("✅ JamboGPT Voice Agent is ready!")
     print("=" * 50)
     demo.launch(