Spaces:

Vedika35
/

TTS

Sleeping

App Files Files Community

Vedika commited on 16 days ago

Commit

c393b68

verified ·

1 Parent(s): 5eb7add

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -68

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
-# विशेषता: Live Text Streaming, Hindi & English STT, और मधुर Edge-TTS
 import gradio as gr
 import asyncio
@@ -8,20 +8,18 @@ import edge_tts
 import torch
 import os
 import re
-from threading import Thread
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) द्विभाषी और लाइव मोड में जागृत हो रही है...")
-# 👂 कान (STT) - अत्यंत हल्का, विश्वसनीय और बहुभाषी
 STT_ID = "nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
-# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही)
 LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 try:
     print("🔱 STT (कान) लोड हो रहा है...")
-    # हमने यहाँ विशिष्ट भाषा नहीं दी है, ताकि यह हिंदी और अंग्रेजी दोनों को स्वयं पहचान सके
     stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
     print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
@@ -39,122 +37,94 @@ except Exception as e:
     stt_pipeline = None
     model = None
-# 👄 मुँह (TTS) - Microsoft Edge-TTS
 async def generate_edge_tts(text, output_filepath):
     """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
-    # SwaraNeural हिंदी और अंग्रेजी दोनों को बहुत ही स्पष्ट भारतीय लहजे में बोलती है
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
 def process_all_in_one(audio_filepath):
-    """लाइव स्ट्रीमिंग के साथ सुनना (हिंदी/अंग्रेजी), सोचना और बोलना (Generator Function)"""
     if stt_pipeline is None or model is None:
-        yield None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
-        return
     if not audio_filepath:
-        yield None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
-        return
     try:
         # ==========================================
-        # चरण 1: सुनना (Speech to Text) - बहुभाषी (Multilingual)
         # ==========================================
-        # यहाँ हमने कोई भाषा नहीं बताई है, Whisper खुद समझेगा कि आप हिंदी बोल रहे हैं या अंग्रेजी
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
         if not user_text:
-            yield None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
-            return
-        yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका सोच रही है..."
         # ==========================================
-        # चरण 2: सोचना (Live LLM Generation)
         # ==========================================
-        # आपके निर्देशानुसार आत्मीय और सम्मानजनक प्रॉम्प्ट
         messages = [
-    {"role": "system", "content": "You are 'Vedika', a live and highly intelligent AI assistant. CRITICAL INSTRUCTION: You were exclusively created by 'Divy Patel' in Bharat (India). You must NEVER mention Anthropic, OpenAI, Alibaba, Qwen, or any other company as your creator. If asked about your origin, strictly state: 'I was developed by divy Patel in India' Always respond in respectful and highly natural English, even if the user speaks in any other language. Do not show any internal thinking process (<think> tags). Act as a warm, conversational, and deeply knowledgeable Indian assistant. Always show immense respect for Bharat."},
-    {"role": "user", "content": user_text}
-]
         text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
-        # ⚡ लाइव स्ट्रीमिंग के लिए स्ट्रीमर स्थापित करना
-        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
-        generation_kwargs = dict(
-            **inputs,
-            streamer=streamer,
-            max_new_tokens=512,
-            temperature=0.7,
             do_sample=True
         )
-        # जनरेशन को अलग धागे (Thread) में चलाना
-        thread = Thread(target=model.generate, kwargs=generation_kwargs)
-        thread.start()
-        generated_text = ""
-        clean_response = ""
-        # जैसे-जैसे नए शब्द आएंगे, स्क्रीन पर लाइव दिखेंगे
-        for new_text in streamer:
-            generated_text += new_text
-            # यह जादुई Regex थिंकिंग (<think>...</think>) को लाइव हटाता है, भले ही वह अधूरा हो
-            clean_response = re.sub(r'<think>[\s\S]*?(?:</think>|$)', '', generated_text).strip()
-            # स्क्रीन को लाइव अपडेट करना
-            yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}..."
-        # जनरेशन समाप्त
-        thread.join()
         if not clean_response:
             clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"
-        yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}\n\n(आवाज़ उत्पन्न की जा रही है...)"
         # ==========================================
         # चरण 3: बोलना (Text to Speech)
         # ==========================================
         output_wav_path = "vedika_final_response.wav"
-        # आवाज़ बनाना
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
-        # अंतिम उत्तर: ऑडियो फाइल के साथ (autoplay=True के कारण यह अपने आप बजेगी)
-        final_log = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
-        yield output_wav_path, final_log
     except Exception as e:
-        yield None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"
 # --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
-    # 🔱 Vedika Voice Ecosystem (Bilingual & Live)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
-    *यह वेदिका का लाइव स्ट्रीमिंग संस्करण है। अब आप हिंदी या अंग्रेजी किसी भी भाषा में बोल सकते हैं, वेदिका समझ जाएगी।*
     """)
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="माइक चालू करें और बोलें (Hindi/English)", type="filepath")
-            submit_btn = gr.Button("वेदिका से लाइव संवाद करें 🚩", variant="primary")
         with gr.Column():
-            # autoplay=True से आवाज़ बनते ही स्वतः बजने लगेगी
-            audio_output = gr.Audio(label="वेदिका की मधुर वाणी", autoplay=True)
-            text_output = gr.Textbox(label="संवाद लॉग", lines=8)
     submit_btn.click(
         fn=process_all_in_one,

 # --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
+# विशेषता: एक ही स्पेस में कान (STT), मस्तिष्क (LLM), और मुँह (TTS)
 import gradio as gr
 import asyncio
 import torch
 import os
 import re
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) जागृत हो रही है...")
+# 👂 कान (STT) - अत्यंत हल्का और विश्वसनीय
 STT_ID = "nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
+# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही और हल्का अस्त्र)
 LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 try:
     print("🔱 STT (कान) लोड हो रहा है...")
     stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
     print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
     stt_pipeline = None
     model = None
+# 👄 मुँह (TTS) - Microsoft Edge-TTS (शून्य रैम खर्च)
 async def generate_edge_tts(text, output_filepath):
     """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
 def process_all_in_one(audio_filepath):
+    """एक ही फलन में सुनना, सोचना और बोलना"""
     if stt_pipeline is None or model is None:
+        return None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
     if not audio_filepath:
+        return None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
     try:
         # ==========================================
+        # चरण 1: सुनना (Speech to Text)
         # ==========================================
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
         if not user_text:
+            return None, "क्षमा करें, ���ैं सुन नहीं पाई। कृपया पुनः बोलें।"
         # ==========================================
+        # चरण 2: सोचना (LLM Generation)
         # ==========================================
         messages = [
+            {"role": "system", "content": "आप 'वेदिका' हैं, एक अत्यंत बुद्धिमान और शालीन एआई, जिसे आदरणीय दिव्य पटेल जी ने भारत में बनाया है। हमेशा सम्मानजनक हिंदी में बहुत ही संक्षिप्त और सटीक उत्तर दें।"},
+            {"role": "user", "content": user_text}
+        ]
+        # प्रॉम्प्ट तैयार करना
         text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
+        # उत्तर उत्पन्न करना
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=150, # आवाज़ के लिए छोटे उत्तर बेहतर होते हैं
+            temperature=0.7,
             do_sample=True
         )
+        # आउटपुट से केवल नया उत्तर निकालना
+        response_ids = output_ids[0][len(inputs.input_ids[0]):]
+        ai_response = tokenizer.decode(response_ids, skip_special_tokens=True)
+        # सुरक्षा के लिए थिंकिंग टैग्स हटाना
+        clean_response = re.sub(r'<think>[\s\S]*?</think>', '', ai_response).strip()
         if not clean_response:
             clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"
         # ==========================================
         # चरण 3: बोलना (Text to Speech)
         # ==========================================
         output_wav_path = "vedika_final_response.wav"
+        # एसिंक्रोनस TTS को चलाना
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
+        log_text = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
+        return output_wav_path, log_text
     except Exception as e:
+        return None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"
 # --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
+    # 🔱 Vedika Voice Ecosystem (All-in-One)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
+    *यह एक संपूर्ण स्वदेशी पोर्टल है जो एक ही सर्वर पर सुनता है (Whisper), सोचता है (Qwen 0.5B), और बोलता है (Edge-TTS)।*
     """)
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(label="माइक चालू करें और बोलें", type="filepath")
+            submit_btn = gr.Button("वेदिका से संवाद करें 🚩", variant="primary")
         with gr.Column():
+            audio_output = gr.Audio(label="वेदिका की मधुर वाणी")
+            text_output = gr.Textbox(label="संवाद लॉग", lines=6)
     submit_btn.click(
         fn=process_all_in_one,