Spaces:

Namanrai
/

Glowmation-TTS-API

Running

App Files Files Community

Namanrai commited on 7 days ago

Commit

b4935c5

verified ·

1 Parent(s): 8f12b2f

Update app.py

Browse files

Files changed (1) hide show

app.py +165 -35

app.py CHANGED Viewed

@@ -1,47 +1,177 @@
-import gradio as gr
-from transformers import pipeline
-import soundfile as sf
-import torch
-from datasets import load_dataset
 import gc
-# System ki purani memory saaf karna
 gc.collect()
-print("⏳ WARNING: Loading the BIGGEST Engine... Server crash hone ke chances hain!")
 try:
-    # Heavy Text-to-Speech pipeline load kar rahe hain
-    synthesizer = pipeline("text-to-speech", "microsoft/speecht5_tts")
-    # High-quality speaker embedding
-    embeddings_dataset = load_dataset("Matthijs/cmu_arctic_xvectors", split="validation")
-    speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
-    print("✅ Engine load ho gaya! (Ye ek miracle hai)")
 except Exception as e:
-    print(f"❌ Server Crashed: {e}")
-def generate_api_voice(text):
-    if not text:
-        return None, "Error: Script likhna zaruri hai!"
     try:
-        # Aawaz generate karne ka heavy process
-        speech = synthesizer(text, forward_params={"speaker_embeddings": speaker_embedding})
-        output_file = "output_voice.wav"
-        sf.write(output_file, speech["audio"], samplerate=speech["sampling_rate"])
-        return output_file, "✅ API Status: Success"
     except Exception as e:
-        return None, f"⚠️ Engine Error: {str(e)}"
-# Ekdum clean UI (No extra clutter)
-iface = gr.Interface(
-    fn=generate_api_voice,
-    inputs=[gr.Textbox(label="Apni Script Yahan Likho")],
-    outputs=[gr.Audio(label="VoiceForge Output"), gr.Textbox(label="Status")],
-    title="🎙️ VoiceForge AI Studio - Heavy Engine Test"
-)
-iface.launch()

+import os
 import gc
+import torch
+import gradio as gr
+import numpy as np
+import tempfile
 gc.collect()
+if torch.cuda.is_available():
+    torch.cuda.empty_cache()
+print("⏳ VibeVoice TTS - Loading Engine...")
+# ===== MODEL LOAD =====
+tts_model = None
 try:
+    from TTS.api import TTS
+    # XTTS v2 - Real voice cloning model
+    # CPU pe bhi kaam karta hai (slow but works)
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"🖥️ Device: {device}")
+    tts_model = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+    print("✅ XTTS v2 Engine Loaded!")
 except Exception as e:
+    print(f"❌ Model Load Failed: {e}")
+    tts_model = None
+# ===== MAIN FUNCTION =====
+def generate_voice(text, reference_audio, language):
+    """
+    text           : Jo bolwana hai
+    reference_audio: User ki apni awaaz ka sample (3-10 sec WAV/MP3)
+    language       : en / hi / ur etc.
+    """
+    # --- Basic Validation ---
+    if tts_model is None:
+        return None, "❌ Model load nahi hua. Server RAM/GPU issue hai. Space restart karo."
+    if not text or text.strip() == "":
+        return None, "⚠️ Text khali hai! Kuch likho pehle."
+    if reference_audio is None:
+        return None, "⚠️ Voice sample upload karo (3-10 second ka clean audio)."
+    if len(text.strip()) > 500:
+        return None, "⚠️ Text zyada lamba hai. 500 characters tak raho abhi."
     try:
+        # Output file
+        output_path = tempfile.mktemp(suffix=".wav")
+        print(f"🎙️ Generating: '{text[:50]}...' | Lang: {language}")
+        tts_model.tts_to_file(
+            text=text,
+            speaker_wav=reference_audio,   # Real voice cloning yahan hota hai
+            language=language,
+            file_path=output_path,
+        )
+        print("✅ Audio Generated!")
+        return output_path, "✅ Awaaz ban gayi! Neeche play karo ya download karo."
     except Exception as e:
+        err = str(e)
+        print(f"❌ Generation Error: {err}")
+        # Common errors ko samajhne wali language mein batao
+        if "cuda out of memory" in err.lower():
+            return None, "❌ GPU RAM full ho gayi. Chhota text try karo ya CPU Space use karo."
+        elif "ffmpeg" in err.lower():
+            return None, "❌ Audio format issue. WAV ya MP3 file upload karo."
+        elif "sample rate" in err.lower():
+            return None, "❌ Audio quality low hai. 22050Hz ya upar ka audio use karo."
+        else:
+            return None, f"❌ Error aaya: {err}"
+# ===== GRADIO UI =====
+LANGUAGES = {
+    "English": "en",
+    "Hindi": "hi",
+    "Urdu": "ur",
+    "French": "fr",
+    "Spanish": "es",
+    "German": "de",
+    "Italian": "it",
+    "Portuguese": "pt",
+    "Polish": "pl",
+    "Turkish": "tr",
+    "Russian": "ru",
+    "Dutch": "nl",
+    "Czech": "cs",
+    "Arabic": "ar",
+    "Chinese": "zh-cn",
+    "Japanese": "ja",
+    "Korean": "ko",
+    "Hungarian": "hu",
+}
+with gr.Blocks(
+    title="🎙️ VibeVoice TTS",
+    theme=gr.themes.Soft(primary_hue="violet", secondary_hue="purple"),
+    css="""
+    .gradio-container { max-width: 800px !important; margin: auto; }
+    h1 { text-align: center; color: #7c3aed; }
+    .status-box textarea { font-size: 14px !important; }
+    """
+) as iface:
+    gr.HTML("""
+    <h1>🎙️ VibeVoice TTS</h1>
+    <p style='text-align:center; color:#6b7280;'>
+        Apni awaaz upload karo → Text likho → AI teri awaaz mein bolega
+    </p>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### 📝 Step 1 – Text")
+            text_input = gr.Textbox(
+                label="Jo bolwana hai woh yahan likho",
+                placeholder="Hello! Yeh meri awaaz hai, AI ne clone ki hai.",
+                lines=4,
+                max_lines=8,
+            )
+            lang_dropdown = gr.Dropdown(
+                choices=list(LANGUAGES.keys()),
+                value="Hindi",
+                label="🌐 Language",
+            )
+        with gr.Column(scale=1):
+            gr.Markdown("### 🎤 Step 2 – Apni Awaaz Upload Karo")
+            audio_input = gr.Audio(
+                sources=["upload", "microphone"],
+                type="filepath",
+                label="Voice Sample (3–10 seconds, saaf awaaz mein)",
+            )
+            gr.Markdown(
+                "<small>💡 Tips: Quiet room mein record karo. WAV ya MP3 dono chalega.</small>"
+            )
+    submit_btn = gr.Button("🚀 Generate Voice", variant="primary", size="lg")
+    gr.Markdown("---")
+    gr.Markdown("### 🔊 Result")
+    audio_output = gr.Audio(label="Generated Voice", type="filepath")
+    status_output = gr.Textbox(
+        label="Status",
+        interactive=False,
+        elem_classes=["status-box"],
+    )
+    # Button click → function call
+    submit_btn.click(
+        fn=lambda text, audio, lang: generate_voice(text, audio, LANGUAGES[lang]),
+        inputs=[text_input, audio_input, lang_dropdown],
+        outputs=[audio_output, status_output],
+    )
+    gr.Markdown("""
+    ---
+    <p style='text-align:center; font-size:12px; color:#9ca3af;'>
+    Powered by <b>Coqui XTTS v2</b> · Real Voice Cloning · 17 Languages
+    </p>
+    """)
+if __name__ == "__main__":
+    iface.launch(server_name="0.0.0.0", server_port=7860)