JamboGPT Bot commited on
Commit
2e235f6
Β·
1 Parent(s): eab1e34

πŸ€– Feature: AI Voice Agent for Kiswahili and Kikuyu with speech recognition and synthesis

Browse files
Files changed (1) hide show
  1. app.py +255 -129
app.py CHANGED
@@ -1,105 +1,72 @@
1
  #!/usr/bin/env python3
2
  """
3
- JamboGPT - African Language AI
 
4
  Inspired by Yarn GPT's clean, professional design.
5
- A Gradio-based application for Text-to-Speech in African languages.
6
  """
7
 
8
  import os
9
  import gradio as gr
10
  import torch
11
- from transformers import pipeline
 
12
  import numpy as np
13
  from scipy.io import wavfile
14
  import tempfile
15
- import json
16
  from datetime import datetime
 
17
 
18
  # Set device
19
  device = "cuda" if torch.cuda.is_available() else "cpu"
20
- print(f"🌍 Starting JamboGPT - African Language AI")
21
  print(f"Using device: {device}")
22
  print("=" * 50)
23
 
24
  # Language configurations
25
  LANGUAGES = {
26
- "Swahili": {
27
  "code": "swh",
28
  "tts_model": "facebook/mms-tts-swh",
29
  "emoji": "πŸ‡°πŸ‡ͺ",
30
  "speakers": "100M+",
31
- "region": "East Africa"
 
 
 
 
 
 
 
 
 
 
 
 
32
  },
33
  "Kikuyu": {
34
  "code": "ki",
35
  "tts_model": "BrianMwangi/African-Kikuyu-TTS",
36
  "emoji": "πŸ‡°πŸ‡ͺ",
37
  "speakers": "7M",
38
- "region": "Kenya"
39
- },
40
- "Yoruba": {
41
- "code": "yor",
42
- "tts_model": "facebook/mms-tts-yor",
43
- "emoji": "πŸ‡³πŸ‡¬",
44
- "speakers": "45M",
45
- "region": "West Africa"
46
- },
47
- "Hausa": {
48
- "code": "hau",
49
- "tts_model": "facebook/mms-tts-hau",
50
- "emoji": "πŸ‡³πŸ‡¬",
51
- "speakers": "90M",
52
- "region": "West Africa"
53
- },
54
- "Amharic": {
55
- "code": "amh",
56
- "tts_model": "facebook/mms-tts-amh",
57
- "emoji": "πŸ‡ͺπŸ‡Ή",
58
- "speakers": "32M",
59
- "region": "Ethiopia"
60
- },
61
- "Fon": {
62
- "code": "fon",
63
- "tts_model": "facebook/mms-tts-fon",
64
- "emoji": "πŸ‡§πŸ‡―",
65
- "speakers": "2M",
66
- "region": "Benin, Togo"
67
- },
68
- "Oromo": {
69
- "code": "orm",
70
- "tts_model": "facebook/mms-tts-orm",
71
- "emoji": "πŸ‡ͺπŸ‡Ή",
72
- "speakers": "40M",
73
- "region": "Ethiopia, Kenya"
74
- },
75
- "Somali": {
76
- "code": "som",
77
- "tts_model": "facebook/mms-tts-som",
78
- "emoji": "πŸ‡ΈπŸ‡΄",
79
- "speakers": "20M",
80
- "region": "East Africa"
81
- },
82
- "Tigrinya": {
83
- "code": "tir",
84
- "tts_model": "facebook/mms-tts-tir",
85
- "emoji": "πŸ‡ͺπŸ‡·",
86
- "speakers": "7M",
87
- "region": "Horn of Africa"
88
- },
89
- "English": {
90
- "code": "eng",
91
- "tts_model": "facebook/mms-tts-eng",
92
- "emoji": "🌍",
93
- "speakers": "1.5B",
94
- "region": "Global"
95
- },
96
  }
97
 
98
  # Cache for loaded models
99
  model_cache = {}
100
-
101
- # History storage
102
- history = []
103
 
104
  # CSS inspired by Yarn GPT
105
  CUSTOM_CSS = """
@@ -271,7 +238,7 @@ body {
271
 
272
  textarea {
273
  width: 100% !important;
274
- min-height: 120px !important;
275
  padding: 16px !important;
276
  border: 1px solid #d0d0d0 !important;
277
  border-radius: 6px !important;
@@ -295,6 +262,7 @@ textarea:focus {
295
  display: flex;
296
  gap: 12px;
297
  margin-top: 16px;
 
298
  }
299
 
300
  .generate-btn {
@@ -318,6 +286,22 @@ textarea:focus {
318
  transform: scale(0.98) !important;
319
  }
320
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
321
  .output-section {
322
  background: #f8f9fa;
323
  border-radius: 8px;
@@ -352,6 +336,12 @@ textarea:focus {
352
  border: 1px solid #f5c6cb;
353
  }
354
 
 
 
 
 
 
 
355
  .audio-player {
356
  width: 100%;
357
  }
@@ -363,6 +353,51 @@ textarea:focus {
363
  padding: 8px 0;
364
  }
365
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
366
  @media (max-width: 768px) {
367
  .main-container {
368
  grid-template-columns: 1fr;
@@ -394,7 +429,6 @@ def load_tts_model(language_name):
394
  lang_config = LANGUAGES[language_name]
395
  model_id = lang_config["tts_model"]
396
 
397
- # Check cache
398
  if model_id in model_cache:
399
  return model_cache[model_id]
400
 
@@ -411,13 +445,75 @@ def load_tts_model(language_name):
411
  print(f"Error loading model {model_id}: {e}")
412
  return None
413
 
414
- def generate_speech(text, language):
415
- """Generate speech from text in the specified language."""
416
- if not text or not text.strip():
417
- return None, "❌ Please enter some text to generate speech."
418
 
419
- if len(text) > 1000:
420
- return None, "❌ Text is too long. Maximum 1000 characters allowed."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
421
 
422
  try:
423
  synthesizer = load_tts_model(language)
@@ -425,58 +521,73 @@ def generate_speech(text, language):
425
  return None, f"❌ Failed to load TTS model for {language}."
426
 
427
  print(f"Generating speech for: {text[:50]}...")
428
-
429
- # Generate speech
430
  speech = synthesizer(text)
431
 
432
- # Extract audio
433
  audio_array = np.array(speech["audio"]).flatten()
434
  sample_rate = speech["sampling_rate"]
435
 
436
- # Save to temporary file
437
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
438
  wavfile.write(f.name, sample_rate, (audio_array * 32767).astype(np.int16))
439
  temp_path = f.name
440
 
441
- # Add to history
442
- history.append({
443
- "text": text[:50] + "..." if len(text) > 50 else text,
444
- "language": language,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
445
  "timestamp": datetime.now().isoformat()
446
  })
447
 
448
- return temp_path, f"βœ… Speech generated successfully in {language}!"
449
-
450
  except Exception as e:
451
- print(f"Error generating speech: {e}")
452
- return None, f"❌ Error generating speech: {str(e)}"
453
 
454
  def create_interface():
455
- """Create the Yarn GPT-inspired interface."""
456
 
457
  with gr.Blocks(
458
- title="JamboGPT - African Language AI",
459
  css=CUSTOM_CSS
460
  ) as demo:
461
 
462
- # Main container with sidebar
463
  with gr.Row(equal_height=True):
464
  # Sidebar
465
  with gr.Column(scale=0, min_width=350):
466
  gr.Markdown(
467
  """
468
  <div class="sidebar">
469
- <div class="sidebar-title">Recent Generations</div>
470
  </div>
471
  """
472
  )
473
 
474
- # History display
475
  history_display = gr.Markdown(
476
  """
477
  <div class="sidebar">
478
  <div style="text-align: center; color: #999; padding: 20px; font-size: 13px;">
479
- No recent generations yet
480
  </div>
481
  </div>
482
  """
@@ -489,25 +600,22 @@ def create_interface():
489
  """
490
  <div class="header">
491
  <div class="logo">🌍 JamboGPT</div>
492
- <div class="headline">African Language AI: The No 1 Multilingual Text-to-Speech Engine</div>
493
- <div class="subheadline">Generate natural-sounding speech in 9 African languages. Integrate with a simple API or use our web interface.</div>
494
  </div>
495
  """
496
  )
497
 
498
  # Tabs
499
- with gr.Row():
500
- with gr.Column(scale=1):
501
- gr.Markdown(
502
- """
503
- <div class="tabs-container">
504
- <button class="tab-button active">🎀 Text Input</button>
505
- <button class="tab-button">πŸ“š Document</button>
506
- <button class="tab-button">πŸ’¬ Conversation</button>
507
- <button class="tab-button">πŸ“Š Batch</button>
508
- </div>
509
- """
510
- )
511
 
512
  # Input section
513
  with gr.Group():
@@ -516,14 +624,14 @@ def create_interface():
516
  # Language selector
517
  language_choice = gr.Dropdown(
518
  choices=list(LANGUAGES.keys()),
519
- value="Swahili",
520
  label="Select Language",
521
  interactive=True
522
  )
523
 
524
  # Language info
525
  language_info = gr.Markdown(
526
- f"πŸ‡°πŸ‡ͺ **Swahili** β€’ 100M+ speakers β€’ East Africa"
527
  )
528
 
529
  def update_language_info(language):
@@ -534,17 +642,18 @@ def create_interface():
534
 
535
  language_choice.change(update_language_info, inputs=language_choice, outputs=language_info)
536
 
537
- # Text input
538
- text_input = gr.Textbox(
539
- label="Enter your text here to generate a single audio file.",
540
- placeholder="Type your text in the selected language...",
541
- lines=4,
 
542
  interactive=True
543
  )
544
 
545
- # Generate button
546
- generate_btn = gr.Button(
547
- "🎡 Generate Speech",
548
  variant="primary",
549
  size="lg"
550
  )
@@ -555,18 +664,35 @@ def create_interface():
555
  with gr.Group():
556
  gr.Markdown('<div class="output-section">')
557
 
558
- gr.Markdown('<div class="output-label">Generated Audio</div>')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
559
 
 
 
560
  audio_output = gr.Audio(
561
  label="",
562
  type="filepath",
563
  interactive=False
564
  )
565
 
 
566
  status_message = gr.Textbox(
567
  label="Status",
568
  interactive=False,
569
- value="Ready to generate speech!"
570
  )
571
 
572
  gr.Markdown('</div>')
@@ -575,27 +701,27 @@ def create_interface():
575
  gr.Markdown(
576
  """
577
  <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e0e0e0; font-size: 13px; color: #999;">
578
- <p>🌍 <strong>JamboGPT</strong> - Making AI accessible in African languages</p>
579
- <p>Powered by Meta's Massively Multilingual Speech (MMS) β€’ <a href="https://huggingface.co/spaces/stano03/jambogpt" style="color: #666;">View on Hugging Face</a></p>
580
  </div>
581
  """
582
  )
583
 
584
- # Connect generate button
585
- generate_btn.click(
586
- fn=generate_speech,
587
- inputs=[text_input, language_choice],
588
- outputs=[audio_output, status_message]
589
  )
590
 
591
  return demo
592
 
593
  if __name__ == "__main__":
594
- print("πŸš€ Creating JamboGPT Interface...")
595
  demo = create_interface()
596
 
597
  print("=" * 50)
598
- print("βœ… JamboGPT is ready!")
599
  print("=" * 50)
600
 
601
  demo.launch(
 
1
  #!/usr/bin/env python3
2
  """
3
+ JamboGPT - African Language AI Voice Agent
4
+ Specialized for Kiswahili and Kikuyu with voice input/output.
5
  Inspired by Yarn GPT's clean, professional design.
 
6
  """
7
 
8
  import os
9
  import gradio as gr
10
  import torch
11
+ import torchaudio
12
+ from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
13
  import numpy as np
14
  from scipy.io import wavfile
15
  import tempfile
 
16
  from datetime import datetime
17
+ import json
18
 
19
  # Set device
20
  device = "cuda" if torch.cuda.is_available() else "cpu"
21
+ print(f"🌍 Starting JamboGPT - African Language AI Voice Agent")
22
  print(f"Using device: {device}")
23
  print("=" * 50)
24
 
25
  # Language configurations
26
  LANGUAGES = {
27
+ "Kiswahili": {
28
  "code": "swh",
29
  "tts_model": "facebook/mms-tts-swh",
30
  "emoji": "πŸ‡°πŸ‡ͺ",
31
  "speakers": "100M+",
32
+ "region": "East Africa",
33
+ "greetings": [
34
+ "Habari! Jina lako nani?",
35
+ "Karibu! Unajifunza nini leo?",
36
+ "Habari yako? Niweza kusaidia?",
37
+ "Asante kwa kukamatia! Unajifunza nini?"
38
+ ],
39
+ "responses": {
40
+ "greeting": "Habari! Niko hapa kusaidia. Unajifunza nini leo?",
41
+ "help": "Niweza kusaidia kwa swahili. Tafadhali niambie unajifunza nini.",
42
+ "thanks": "Asante sana! Niko hapa kila wakati.",
43
+ "bye": "Kwaheri! Karibu tena mwingine wakati."
44
+ }
45
  },
46
  "Kikuyu": {
47
  "code": "ki",
48
  "tts_model": "BrianMwangi/African-Kikuyu-TTS",
49
  "emoji": "πŸ‡°πŸ‡ͺ",
50
  "speakers": "7M",
51
+ "region": "Kenya",
52
+ "greetings": [
53
+ "WΔ© mwega! WΔ© Ε©rΔ©a mwega?",
54
+ "Karibu! NΔ©guo mwega!",
55
+ "Mwega! NΔ© Ε©ndΕ© Ε©rΔ©kΕ©?",
56
+ "WΔ© mwega! NΔ©kΔ©o kΔ©ndΕ©?"
57
+ ],
58
+ "responses": {
59
+ "greeting": "WΔ© mwega! NΔ© Ε©ndΕ© Ε©rΔ©kΕ©?",
60
+ "help": "NΔ© mwega! NΔ©kΔ©o kΔ©ndΕ© kΔ©rΔ©a Ε©rΔ© na kΔ©o?",
61
+ "thanks": "Mwega muno! NΔ© mwega.",
62
+ "bye": "RΔ©a rΔ©u! WΔ© mwega!"
63
+ }
64
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  }
66
 
67
  # Cache for loaded models
68
  model_cache = {}
69
+ conversation_history = []
 
 
70
 
71
  # CSS inspired by Yarn GPT
72
  CUSTOM_CSS = """
 
238
 
239
  textarea {
240
  width: 100% !important;
241
+ min-height: 100px !important;
242
  padding: 16px !important;
243
  border: 1px solid #d0d0d0 !important;
244
  border-radius: 6px !important;
 
262
  display: flex;
263
  gap: 12px;
264
  margin-top: 16px;
265
+ flex-wrap: wrap;
266
  }
267
 
268
  .generate-btn {
 
286
  transform: scale(0.98) !important;
287
  }
288
 
289
+ .secondary-btn {
290
+ background: #f0f0f0 !important;
291
+ color: #333 !important;
292
+ border: 1px solid #d0d0d0 !important;
293
+ border-radius: 6px !important;
294
+ padding: 12px 24px !important;
295
+ font-weight: 600 !important;
296
+ font-size: 14px !important;
297
+ cursor: pointer !important;
298
+ transition: all 0.2s ease !important;
299
+ }
300
+
301
+ .secondary-btn:hover {
302
+ background: #e0e0e0 !important;
303
+ }
304
+
305
  .output-section {
306
  background: #f8f9fa;
307
  border-radius: 8px;
 
336
  border: 1px solid #f5c6cb;
337
  }
338
 
339
+ .status-info {
340
+ background: #d1ecf1;
341
+ color: #0c5460;
342
+ border: 1px solid #bee5eb;
343
+ }
344
+
345
  .audio-player {
346
  width: 100%;
347
  }
 
353
  padding: 8px 0;
354
  }
355
 
356
+ .conversation-display {
357
+ background: white;
358
+ border: 1px solid #e0e0e0;
359
+ border-radius: 6px;
360
+ padding: 16px;
361
+ margin-bottom: 16px;
362
+ max-height: 400px;
363
+ overflow-y: auto;
364
+ font-size: 13px;
365
+ }
366
+
367
+ .message {
368
+ margin-bottom: 12px;
369
+ padding: 8px;
370
+ border-radius: 4px;
371
+ }
372
+
373
+ .user-message {
374
+ background: #e3f2fd;
375
+ color: #1565c0;
376
+ margin-left: 20px;
377
+ text-align: right;
378
+ }
379
+
380
+ .agent-message {
381
+ background: #f5f5f5;
382
+ color: #333;
383
+ margin-right: 20px;
384
+ }
385
+
386
+ .recording-indicator {
387
+ display: inline-block;
388
+ width: 12px;
389
+ height: 12px;
390
+ background: #ff4444;
391
+ border-radius: 50%;
392
+ margin-right: 8px;
393
+ animation: pulse 1s infinite;
394
+ }
395
+
396
+ @keyframes pulse {
397
+ 0%, 100% { opacity: 1; }
398
+ 50% { opacity: 0.5; }
399
+ }
400
+
401
  @media (max-width: 768px) {
402
  .main-container {
403
  grid-template-columns: 1fr;
 
429
  lang_config = LANGUAGES[language_name]
430
  model_id = lang_config["tts_model"]
431
 
 
432
  if model_id in model_cache:
433
  return model_cache[model_id]
434
 
 
445
  print(f"Error loading model {model_id}: {e}")
446
  return None
447
 
448
+ def load_asr_model():
449
+ """Load Automatic Speech Recognition model (Whisper)."""
450
+ if "asr" in model_cache:
451
+ return model_cache["asr"]
452
 
453
+ try:
454
+ print("Loading Whisper ASR model...")
455
+ asr = pipeline(
456
+ "automatic-speech-recognition",
457
+ model="openai/whisper-base",
458
+ device=device if device == "cuda" else -1
459
+ )
460
+ model_cache["asr"] = asr
461
+ return asr
462
+ except Exception as e:
463
+ print(f"Error loading ASR model: {e}")
464
+ return None
465
+
466
+ def transcribe_audio(audio_file):
467
+ """Transcribe audio to text using Whisper."""
468
+ try:
469
+ asr = load_asr_model()
470
+ if asr is None:
471
+ return None, "❌ Failed to load ASR model."
472
+
473
+ print(f"Transcribing audio...")
474
+ result = asr(audio_file)
475
+ text = result.get("text", "").strip()
476
+
477
+ if not text:
478
+ return None, "❌ Could not transcribe audio. Please try again."
479
+
480
+ return text, f"βœ… Transcribed: {text}"
481
+ except Exception as e:
482
+ print(f"Error transcribing: {e}")
483
+ return None, f"❌ Error transcribing: {str(e)}"
484
+
485
+ def generate_response(user_text, language):
486
+ """Generate a response based on user input."""
487
+ try:
488
+ # Simple response generation based on keywords
489
+ user_text_lower = user_text.lower()
490
+
491
+ lang_config = LANGUAGES.get(language, {})
492
+ responses = lang_config.get("responses", {})
493
+
494
+ # Detect intent
495
+ if any(word in user_text_lower for word in ["habari", "wΔ©", "how", "hello", "hi"]):
496
+ response = responses.get("greeting", "Habari!")
497
+ elif any(word in user_text_lower for word in ["asante", "thank", "mwega"]):
498
+ response = responses.get("thanks", "Asante!")
499
+ elif any(word in user_text_lower for word in ["bye", "goodbye", "kwaheri", "rΔ©a"]):
500
+ response = responses.get("bye", "Kwaheri!")
501
+ else:
502
+ # Default response
503
+ if language == "Kiswahili":
504
+ response = f"Ninataka kusikia zaidi kuhusu: {user_text}. Unaweza kuandika zaidi?"
505
+ else: # Kikuyu
506
+ response = f"NΔ© mwega! WΔ© Ε©rΔ©a mwega? NΔ©kΔ©o kΔ©ndΕ©?"
507
+
508
+ return response, "βœ… Response generated!"
509
+ except Exception as e:
510
+ print(f"Error generating response: {e}")
511
+ return None, f"❌ Error: {str(e)}"
512
+
513
+ def synthesize_speech(text, language):
514
+ """Convert text to speech."""
515
+ if not text or not text.strip():
516
+ return None, "❌ No text to synthesize."
517
 
518
  try:
519
  synthesizer = load_tts_model(language)
 
521
  return None, f"❌ Failed to load TTS model for {language}."
522
 
523
  print(f"Generating speech for: {text[:50]}...")
 
 
524
  speech = synthesizer(text)
525
 
 
526
  audio_array = np.array(speech["audio"]).flatten()
527
  sample_rate = speech["sampling_rate"]
528
 
 
529
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
530
  wavfile.write(f.name, sample_rate, (audio_array * 32767).astype(np.int16))
531
  temp_path = f.name
532
 
533
+ return temp_path, "βœ… Speech generated!"
534
+ except Exception as e:
535
+ print(f"Error synthesizing: {e}")
536
+ return None, f"❌ Error: {str(e)}"
537
+
538
+ def process_voice_input(audio_input, language):
539
+ """Process voice input: transcribe -> generate response -> synthesize."""
540
+ try:
541
+ # Step 1: Transcribe
542
+ user_text, transcribe_status = transcribe_audio(audio_input)
543
+ if user_text is None:
544
+ return None, None, transcribe_status, ""
545
+
546
+ # Step 2: Generate response
547
+ response_text, response_status = generate_response(user_text, language)
548
+ if response_text is None:
549
+ return None, None, response_status, ""
550
+
551
+ # Step 3: Synthesize response
552
+ audio_output, synth_status = synthesize_speech(response_text, language)
553
+
554
+ # Add to conversation history
555
+ conversation_history.append({
556
+ "user": user_text,
557
+ "agent": response_text,
558
  "timestamp": datetime.now().isoformat()
559
  })
560
 
561
+ return audio_output, response_text, synth_status, user_text
 
562
  except Exception as e:
563
+ print(f"Error processing voice: {e}")
564
+ return None, None, f"❌ Error: {str(e)}", ""
565
 
566
  def create_interface():
567
+ """Create the voice agent interface."""
568
 
569
  with gr.Blocks(
570
+ title="JamboGPT - African Language AI Voice Agent",
571
  css=CUSTOM_CSS
572
  ) as demo:
573
 
574
+ # Main container
575
  with gr.Row(equal_height=True):
576
  # Sidebar
577
  with gr.Column(scale=0, min_width=350):
578
  gr.Markdown(
579
  """
580
  <div class="sidebar">
581
+ <div class="sidebar-title">πŸ—£οΈ Conversation History</div>
582
  </div>
583
  """
584
  )
585
 
 
586
  history_display = gr.Markdown(
587
  """
588
  <div class="sidebar">
589
  <div style="text-align: center; color: #999; padding: 20px; font-size: 13px;">
590
+ No conversations yet
591
  </div>
592
  </div>
593
  """
 
600
  """
601
  <div class="header">
602
  <div class="logo">🌍 JamboGPT</div>
603
+ <div class="headline">African Language AI Voice Agent</div>
604
+ <div class="subheadline">Speak in Kiswahili or Kikuyu and have a natural conversation with AI. Your voice is understood, processed, and responded to in your language.</div>
605
  </div>
606
  """
607
  )
608
 
609
  # Tabs
610
+ gr.Markdown(
611
+ """
612
+ <div class="tabs-container">
613
+ <button class="tab-button active">πŸŽ™οΈ Voice Agent</button>
614
+ <button class="tab-button">πŸ“ Text Mode</button>
615
+ <button class="tab-button">βš™οΈ Settings</button>
616
+ </div>
617
+ """
618
+ )
 
 
 
619
 
620
  # Input section
621
  with gr.Group():
 
624
  # Language selector
625
  language_choice = gr.Dropdown(
626
  choices=list(LANGUAGES.keys()),
627
+ value="Kiswahili",
628
  label="Select Language",
629
  interactive=True
630
  )
631
 
632
  # Language info
633
  language_info = gr.Markdown(
634
+ f"πŸ‡°πŸ‡ͺ **Kiswahili** β€’ 100M+ speakers β€’ East Africa"
635
  )
636
 
637
  def update_language_info(language):
 
642
 
643
  language_choice.change(update_language_info, inputs=language_choice, outputs=language_info)
644
 
645
+ # Voice input
646
+ gr.Markdown("**🎀 Speak in your language:**")
647
+ audio_input = gr.Audio(
648
+ label="Record your voice",
649
+ type="filepath",
650
+ sources=["microphone"],
651
  interactive=True
652
  )
653
 
654
+ # Process button
655
+ process_btn = gr.Button(
656
+ "πŸŽ™οΈ Process Voice",
657
  variant="primary",
658
  size="lg"
659
  )
 
664
  with gr.Group():
665
  gr.Markdown('<div class="output-section">')
666
 
667
+ # Transcription
668
+ gr.Markdown('<div class="output-label">πŸ“ What You Said</div>')
669
+ transcription = gr.Textbox(
670
+ label="",
671
+ interactive=False,
672
+ placeholder="Your transcribed text will appear here"
673
+ )
674
+
675
+ # Agent response
676
+ gr.Markdown('<div class="output-label">πŸ€– Agent Response</div>')
677
+ agent_response = gr.Textbox(
678
+ label="",
679
+ interactive=False,
680
+ placeholder="The agent's response will appear here"
681
+ )
682
 
683
+ # Audio output
684
+ gr.Markdown('<div class="output-label">πŸ”Š Agent Voice</div>')
685
  audio_output = gr.Audio(
686
  label="",
687
  type="filepath",
688
  interactive=False
689
  )
690
 
691
+ # Status
692
  status_message = gr.Textbox(
693
  label="Status",
694
  interactive=False,
695
+ value="Ready to listen!"
696
  )
697
 
698
  gr.Markdown('</div>')
 
701
  gr.Markdown(
702
  """
703
  <div style="margin-top: 40px; padding-top: 20px; border-top: 1px solid #e0e0e0; font-size: 13px; color: #999;">
704
+ <p>🌍 <strong>JamboGPT</strong> - African Language AI Voice Agent</p>
705
+ <p>Speak naturally in Kiswahili or Kikuyu β€’ Powered by Whisper + Hugging Face β€’ <a href="https://huggingface.co/spaces/stano03/jambogpt" style="color: #666;">View on Hugging Face</a></p>
706
  </div>
707
  """
708
  )
709
 
710
+ # Connect process button
711
+ process_btn.click(
712
+ fn=process_voice_input,
713
+ inputs=[audio_input, language_choice],
714
+ outputs=[audio_output, agent_response, status_message, transcription]
715
  )
716
 
717
  return demo
718
 
719
  if __name__ == "__main__":
720
+ print("πŸš€ Creating JamboGPT Voice Agent Interface...")
721
  demo = create_interface()
722
 
723
  print("=" * 50)
724
+ print("βœ… JamboGPT Voice Agent is ready!")
725
  print("=" * 50)
726
 
727
  demo.launch(