Namanrai commited on
Commit
3ec8508
Β·
verified Β·
1 Parent(s): 5ba20b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -130
app.py CHANGED
@@ -1,137 +1,65 @@
1
  import os
2
- import gc
3
- import torch
4
  import gradio as gr
 
 
5
  import soundfile as sf
6
- import tempfile
7
 
 
8
  gc.collect()
9
- if torch.cuda.is_available():
10
- torch.cuda.empty_cache()
11
-
12
- print("⏳ Qwen3-TTS loading...")
13
-
14
- model = None
15
-
16
- def load_model():
17
- global model
18
- if model is not None:
19
- return True
20
- try:
21
- from qwen_tts import Qwen3TTSModel
22
- model = Qwen3TTSModel.from_pretrained(
23
- "Qwen/Qwen3-TTS-12Hz-0.6B-Base",
24
- device_map="cuda" if torch.cuda.is_available() else "cpu",
25
- dtype=torch.bfloat16,
26
- )
27
- print("βœ… Qwen3-TTS Loaded!")
28
- return True
29
- except Exception as e:
30
- print(f"❌ Load Error: {e}")
31
- return False
32
-
33
-
34
- def generate_voice(text, reference_audio, ref_transcript, language):
35
- if not text or text.strip() == "":
36
- return None, "⚠️ Text khali hai! Kuch likho."
37
-
38
- if reference_audio is None:
39
- return None, "⚠️ Apni awaaz ka audio upload karo (3-10 sec)."
40
-
41
- if not load_model():
42
- return None, "❌ Model load nahi hua. GPU Space use kar raha hai? T4 GPU select karo."
43
-
44
  try:
45
- output_path = tempfile.mktemp(suffix=".wav")
46
-
47
- # ref_transcript optional hai β€” agar nahi diya toh None pass karo
48
- transcript = ref_transcript.strip() if ref_transcript and ref_transcript.strip() else None
49
-
50
- wavs, sr = model.generate_voice_clone(
51
- text=text,
52
- language=language,
53
- ref_audio=reference_audio,
54
- ref_text=transcript, # None hoga toh model khud guess karega
55
- )
56
-
57
- sf.write(output_path, wavs[0], sr)
58
- return output_path, "βœ… Awaaz ban gayi! Neeche play/download karo."
59
-
60
- except torch.cuda.OutOfMemoryError:
61
- return None, "❌ GPU RAM full! Chhota text try karo (100 words tak)."
62
  except Exception as e:
63
- err = str(e)
64
- print(f"❌ Error: {err}")
65
- if "ffmpeg" in err.lower() or "audio" in err.lower():
66
- return None, "❌ Audio format issue. WAV file upload karo."
67
- return None, f"❌ Error: {err}"
68
-
69
-
70
- # ── UI ──────────────────────────────────────────────────────────────────────
71
-
72
- LANGUAGES = [
73
- "English", "Chinese", "Japanese", "Korean",
74
- "German", "French", "Russian", "Portuguese",
75
- "Spanish", "Italian"
76
- ]
77
-
78
- with gr.Blocks(title="πŸŽ™οΈ VibeVoice – Qwen3 TTS") as iface:
79
-
80
- gr.HTML("""
81
- <h1 style='text-align:center; color:#7c3aed;'>πŸŽ™οΈ VibeVoice – Qwen3 TTS</h1>
82
- <p style='text-align:center; color:#6b7280;'>
83
- Apni awaaz upload karo β†’ Text likho β†’ AI teri awaaz mein bolega<br>
84
- <small>Powered by Qwen3-TTS-0.6B Β· Real Voice Cloning</small>
85
- </p>
86
- """)
87
-
88
- with gr.Row():
89
- with gr.Column():
90
- gr.Markdown("### πŸ“ Step 1 – Text")
91
- text_input = gr.Textbox(
92
- label="Jo bolwana hai",
93
- placeholder="Namaste! Yeh meri awaaz hai jo AI ne clone ki hai.",
94
- lines=4,
95
- )
96
- lang_dropdown = gr.Dropdown(
97
- choices=LANGUAGES,
98
- value="English",
99
- label="🌐 Language",
100
- )
101
-
102
- with gr.Column():
103
- gr.Markdown("### 🎀 Step 2 – Voice Sample")
104
- audio_input = gr.Audio(
105
- source="upload",
106
- type="filepath",
107
- label="Apni awaaz upload karo (3–10 sec, saaf audio)",
108
- )
109
- ref_text_input = gr.Textbox(
110
- label="Reference Audio ka text (optional, lekin doge toh quality better hogi)",
111
- placeholder="Jo tumne us audio mein bola tha...",
112
- lines=2,
113
- )
114
-
115
- submit_btn = gr.Button("πŸš€ Generate Voice", variant="primary")
116
-
117
- gr.Markdown("### πŸ”Š Result")
118
- audio_output = gr.Audio(label="Generated Voice")
119
- status_output = gr.Textbox(label="Status", interactive=False)
120
-
121
- submit_btn.click(
122
- fn=generate_voice,
123
- inputs=[text_input, audio_input, ref_text_input, lang_dropdown],
124
- outputs=[audio_output, status_output],
125
- )
126
-
127
- gr.Markdown("""
128
- ---
129
- πŸ’‘ **Tips:**
130
- - GPU Space use karo (T4 free tier chalega)
131
- - Reference audio: 5-10 second, quiet room, WAV format best hai
132
- - Ref text doge toh cloning zyada accurate hogi
133
- - Pehli baar thoda slow hoga (model download), baad mein fast
134
- """)
135
-
136
- if __name__ == "__main__":
137
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import os
 
 
2
  import gradio as gr
3
+ from TTS.api import TTS
4
+ import noisereduce as nr
5
  import soundfile as sf
6
+ import gc
7
 
8
+ # Memory aur threads optimize kar rahe hain CPU ke liye
9
  gc.collect()
10
+ os.environ['COQUI_TOS_AGREED'] = '1'
11
+ os.environ['OMP_NUM_THREADS'] = '4'
12
+
13
+ print("⏳ Loading Final Boss: NeuTTS Air Q4...")
14
+
15
+ tts = None
16
+ current_engine = "None"
17
+
18
+ # πŸ”„ THE AUTO-SWITCH SYSTEM
19
+ try:
20
+ # Pehle NeuTTS (XTTS) load karne ki koshish
21
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2")
22
+ current_engine = "NeuTTS (Pro Quality)"
23
+ print("βœ… NeuTTS Successfully Loaded!")
24
+ except Exception as e:
25
+ # Agar RAM crash hui, toh chup-chaap Backup Engine chala do
26
+ print(f"⚠️ NeuTTS Load Failed. Auto-Switching to Backup Engine... Error: {e}")
27
+ tts = TTS("tts_models/multilingual/multi-dataset/your_tts")
28
+ current_engine = "Backup Engine (your_tts)"
29
+ print("βœ… Backup Engine Loaded Successfully!")
30
+
31
+ def generate_api_voice(text, reference_audio):
32
+ if not text or not reference_audio:
33
+ return None, "⚠️ Error: Script aur Voice dono zaroori hain bhai!"
34
+
 
 
 
 
 
 
 
 
 
 
35
  try:
36
+ # Background noise ki safai
37
+ data, rate = sf.read(reference_audio)
38
+ if len(data.shape) > 1: data = data.mean(axis=1)
39
+ clean_data = nr.reduce_noise(y=data, sr=rate)
40
+ sf.write("clean_ref.wav", clean_data, rate)
41
+
42
+ output_file = "output_voice.wav"
43
+
44
+ # Aawaz banana (Jo bhi engine zinda bacha ho usse)
45
+ tts.tts_to_file(text=text, speaker_wav="clean_ref.wav", language="en", file_path=output_file)
46
+
47
+ return output_file, f"βœ… Success (Powered by: {current_engine})"
 
 
 
 
 
48
  except Exception as e:
49
+ return None, f"⚠️ Generation Error: {str(e)}"
50
+
51
+ # Ekdum saaf User Interface
52
+ iface = gr.Interface(
53
+ fn=generate_api_voice,
54
+ inputs=[
55
+ gr.Textbox(label="Apni Script Yahan Likho"),
56
+ gr.Audio(type="filepath", label="Voice Yahan Upload Kar")
57
+ ],
58
+ outputs=[
59
+ gr.Audio(label="VoiceForge Output"),
60
+ gr.Textbox(label="API Status")
61
+ ],
62
+ title="πŸŽ™οΈ VoiceForge AI Studio"
63
+ )
64
+
65
+ iface.launch()