Spaces:

Namanrai
/

Glowmation-TTS-API

Running

App Files Files Community

Namanrai commited on 8 days ago

Commit

3ec8508

verified ·

1 Parent(s): 5ba20b4

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -130

app.py CHANGED Viewed

@@ -1,137 +1,65 @@
 import os
-import gc
-import torch
 import gradio as gr
 import soundfile as sf
-import tempfile
 gc.collect()
-if torch.cuda.is_available():
-    torch.cuda.empty_cache()
-print("⏳ Qwen3-TTS loading...")
-model = None
-def load_model():
-    global model
-    if model is not None:
-        return True
-    try:
-        from qwen_tts import Qwen3TTSModel
-        model = Qwen3TTSModel.from_pretrained(
-            "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
-            device_map="cuda" if torch.cuda.is_available() else "cpu",
-            dtype=torch.bfloat16,
-        )
-        print("✅ Qwen3-TTS Loaded!")
-        return True
-    except Exception as e:
-        print(f"❌ Load Error: {e}")
-        return False
-def generate_voice(text, reference_audio, ref_transcript, language):
-    if not text or text.strip() == "":
-        return None, "⚠️ Text khali hai! Kuch likho."
-    if reference_audio is None:
-        return None, "⚠️ Apni awaaz ka audio upload karo (3-10 sec)."
-    if not load_model():
-        return None, "❌ Model load nahi hua. GPU Space use kar raha hai? T4 GPU select karo."
     try:
-        output_path = tempfile.mktemp(suffix=".wav")
-        # ref_transcript optional hai — agar nahi diya toh None pass karo
-        transcript = ref_transcript.strip() if ref_transcript and ref_transcript.strip() else None
-        wavs, sr = model.generate_voice_clone(
-            text=text,
-            language=language,
-            ref_audio=reference_audio,
-            ref_text=transcript,  # None hoga toh model khud guess karega
-        )
-        sf.write(output_path, wavs[0], sr)
-        return output_path, "✅ Awaaz ban gayi! Neeche play/download karo."
-    except torch.cuda.OutOfMemoryError:
-        return None, "❌ GPU RAM full! Chhota text try karo (100 words tak)."
     except Exception as e:
-        err = str(e)
-        print(f"❌ Error: {err}")
-        if "ffmpeg" in err.lower() or "audio" in err.lower():
-            return None, "❌ Audio format issue. WAV file upload karo."
-        return None, f"❌ Error: {err}"
-# ── UI ──────────────────────────────────────────────────────────────────────
-LANGUAGES = [
-    "English", "Chinese", "Japanese", "Korean",
-    "German", "French", "Russian", "Portuguese",
-    "Spanish", "Italian"
-]
-with gr.Blocks(title="🎙️ VibeVoice – Qwen3 TTS") as iface:
-    gr.HTML("""
-    <h1 style='text-align:center; color:#7c3aed;'>🎙️ VibeVoice – Qwen3 TTS</h1>
-    <p style='text-align:center; color:#6b7280;'>
-        Apni awaaz upload karo → Text likho → AI teri awaaz mein bolega<br>
-        <small>Powered by Qwen3-TTS-0.6B · Real Voice Cloning</small>
-    </p>
-    """)
-    with gr.Row():
-        with gr.Column():
-            gr.Markdown("### 📝 Step 1 – Text")
-            text_input = gr.Textbox(
-                label="Jo bolwana hai",
-                placeholder="Namaste! Yeh meri awaaz hai jo AI ne clone ki hai.",
-                lines=4,
-            )
-            lang_dropdown = gr.Dropdown(
-                choices=LANGUAGES,
-                value="English",
-                label="🌐 Language",
-            )
-        with gr.Column():
-            gr.Markdown("### 🎤 Step 2 – Voice Sample")
-            audio_input = gr.Audio(
-                source="upload",
-                type="filepath",
-                label="Apni awaaz upload karo (3–10 sec, saaf audio)",
-            )
-            ref_text_input = gr.Textbox(
-                label="Reference Audio ka text (optional, lekin doge toh quality better hogi)",
-                placeholder="Jo tumne us audio mein bola tha...",
-                lines=2,
-            )
-    submit_btn = gr.Button("🚀 Generate Voice", variant="primary")
-    gr.Markdown("### 🔊 Result")
-    audio_output = gr.Audio(label="Generated Voice")
-    status_output = gr.Textbox(label="Status", interactive=False)
-    submit_btn.click(
-        fn=generate_voice,
-        inputs=[text_input, audio_input, ref_text_input, lang_dropdown],
-        outputs=[audio_output, status_output],
-    )
-    gr.Markdown("""
-    ---
-    💡 **Tips:**
-    - GPU Space use karo (T4 free tier chalega)
-    - Reference audio: 5-10 second, quiet room, WAV format best hai
-    - Ref text doge toh cloning zyada accurate hogi
-    - Pehli baar thoda slow hoga (model download), baad mein fast
-    """)
-if __name__ == "__main__":
-    iface.launch(server_name="0.0.0.0", server_port=7860)

 import os
 import gradio as gr
+from TTS.api import TTS
+import noisereduce as nr
 import soundfile as sf
+import gc
+# Memory aur threads optimize kar rahe hain CPU ke liye
 gc.collect()
+os.environ['COQUI_TOS_AGREED'] = '1'
+os.environ['OMP_NUM_THREADS'] = '4'
+print("⏳ Loading Final Boss: NeuTTS Air Q4...")
+tts = None
+current_engine = "None"
+# 🔄 THE AUTO-SWITCH SYSTEM
+try:
+    # Pehle NeuTTS (XTTS) load karne ki koshish
+    tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
+    current_engine = "NeuTTS (Pro Quality)"
+    print("✅ NeuTTS Successfully Loaded!")
+except Exception as e:
+    # Agar RAM crash hui, toh chup-chaap Backup Engine chala do
+    print(f"⚠️ NeuTTS Load Failed. Auto-Switching to Backup Engine... Error: {e}")
+    tts = TTS("tts_models/multilingual/multi-dataset/your_tts")
+    current_engine = "Backup Engine (your_tts)"
+    print("✅ Backup Engine Loaded Successfully!")
+def generate_api_voice(text, reference_audio):
+    if not text or not reference_audio:
+        return None, "⚠️ Error: Script aur Voice dono zaroori hain bhai!"
     try:
+        # Background noise ki safai
+        data, rate = sf.read(reference_audio)
+        if len(data.shape) > 1: data = data.mean(axis=1)
+        clean_data = nr.reduce_noise(y=data, sr=rate)
+        sf.write("clean_ref.wav", clean_data, rate)
+        output_file = "output_voice.wav"
+        # Aawaz banana (Jo bhi engine zinda bacha ho usse)
+        tts.tts_to_file(text=text, speaker_wav="clean_ref.wav", language="en", file_path=output_file)
+        return output_file, f"✅ Success (Powered by: {current_engine})"
     except Exception as e:
+        return None, f"⚠️ Generation Error: {str(e)}"
+# Ekdum saaf User Interface
+iface = gr.Interface(
+    fn=generate_api_voice,
+    inputs=[
+        gr.Textbox(label="Apni Script Yahan Likho"),
+        gr.Audio(type="filepath", label="Voice Yahan Upload Kar")
+    ],
+    outputs=[
+        gr.Audio(label="VoiceForge Output"),
+        gr.Textbox(label="API Status")
+    ],
+    title="🎙️ VoiceForge AI Studio"
+)
+iface.launch()