Spaces:

Vedika35
/

TTS

Sleeping

App Files Files Community

Vedika commited on 17 days ago

Commit

f4a4d6b

verified ·

1 Parent(s): ee0dd5d

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -36

app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
-# विशेषता: एक ही स्पेस में कान (STT), मस्तिष्क (LLM), और मुँह (TTS)
 import gradio as gr
 import asyncio
@@ -8,18 +8,20 @@ import edge_tts
 import torch
 import os
 import re
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) जागृत हो रही है...")
-# 👂 कान (STT) - अत्यंत हल्का और विश्वसनीय
 STT_ID = "openai/whisper-tiny"
-# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही और हल्का अस्त्र)
 LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 try:
     print("🔱 STT (कान) लोड हो रहा है...")
     stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
     print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
@@ -37,94 +39,120 @@ except Exception as e:
     stt_pipeline = None
     model = None
-# 👄 मुँह (TTS) - Microsoft Edge-TTS (शून्य रैम खर्च)
 async def generate_edge_tts(text, output_filepath):
     """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
 def process_all_in_one(audio_filepath):
-    """एक ही फलन में सुनना, सोचना और बोलना"""
     if stt_pipeline is None or model is None:
-        return None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
     if not audio_filepath:
-        return None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
     try:
         # ==========================================
-        # चरण 1: सुनना (Speech to Text)
         # ==========================================
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
         if not user_text:
-            return None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
         # ==========================================
-        # चरण 2: सोचना (LLM Generation)
         # ==========================================
         messages = [
-            {"role": "system", "content": "You are Vedika live an intelligent AI assistant. That talk live with the user so behave as a live assistant. Don't say you are a text based model. You can speak to user and solve their complex problems just by talking. You are entirely created by Divy Patel in India provide very fast and accurate with logical thinking answer to user don't show your thinking to user. According to previous question of user ask about their needs"},
             {"role": "user", "content": user_text}
         ]
-        # प्रॉम्प्ट तैयार करना
         text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
-        # उत्तर उत्पन्न करना
-        output_ids = model.generate(
-            **inputs,
-            max_new_tokens=150, # आवाज़ के लिए छोटे उत्तर बेहतर होते हैं
-            temperature=0.7,
             do_sample=True
         )
-        # आउटपुट से केवल नया उत्तर निकालना
-        response_ids = output_ids[0][len(inputs.input_ids[0]):]
-        ai_response = tokenizer.decode(response_ids, skip_special_tokens=True)
-        # सुरक्षा के लिए थिंकिंग टैग्स हटाना
-        clean_response = re.sub(r'<think>[\s\S]*?</think>', '', ai_response).strip()
         if not clean_response:
             clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"
         # ==========================================
         # चरण 3: बोलना (Text to Speech)
         # ==========================================
         output_wav_path = "vedika_final_response.wav"
-        # एसिंक्रोनस TTS को चलाना
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
-        log_text = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
-        return output_wav_path, log_text
     except Exception as e:
-        return None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"
 # --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
-    # 🔱 Vedika Voice Ecosystem (All-in-One)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
-    *यह एक संपूर्ण स्वदेशी पोर्टल है जो एक ही सर्वर पर सुनता है (Whisper), सोचता है (Qwen 0.5B), और बोलता है (Edge-TTS)।*
     """)
     with gr.Row():
         with gr.Column():
-            audio_input = gr.Audio(label="माइक चालू करें और बोलें", type="filepath")
-            submit_btn = gr.Button("वेदिका से संवाद करें 🚩", variant="primary")
         with gr.Column():
-            audio_output = gr.Audio(label="वेदिका की मधुर वाणी")
-            text_output = gr.Textbox(label="संवाद लॉग", lines=6)
     submit_btn.click(
         fn=process_all_in_one,

 # --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
+# विशेषता: Live Text Streaming, Hindi & English STT, और मधुर Edge-TTS
 import gradio as gr
 import asyncio
 import torch
 import os
 import re
+from threading import Thread
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
+print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) द्विभाषी और लाइव मोड में जागृत हो रही है...")
+# 👂 कान (STT) - अत्यंत हल्का, विश्वसनीय और बहुभाषी
 STT_ID = "openai/whisper-tiny"
+# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही)
 LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 try:
     print("🔱 STT (कान) लोड हो रहा है...")
+    # हमने यहाँ विशिष्ट भाषा नहीं दी है, ताकि यह हिंदी और अंग्रेजी दोनों को स्वयं पहचान सके
     stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
     print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
     stt_pipeline = None
     model = None
+# 👄 मुँह (TTS) - Microsoft Edge-TTS
 async def generate_edge_tts(text, output_filepath):
     """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
+    # SwaraNeural हिंदी और अंग्रेजी दोनों को बहुत ही स्पष्ट भारतीय लहजे में बोलती है
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
 def process_all_in_one(audio_filepath):
+    """लाइव स्ट्रीमिंग के साथ सुनना (हिंदी/अंग्रेजी), सोचना और बोलना (Generator Function)"""
     if stt_pipeline is None or model is None:
+        yield None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
+        return
     if not audio_filepath:
+        yield None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
+        return
     try:
         # ==========================================
+        # चरण 1: सुनना (Speech to Text) - बहुभाषी (Multilingual)
         # ==========================================
+        # यहाँ हमने कोई भाषा नहीं बताई है, Whisper खुद समझेगा कि आप हिंदी बोल रहे हैं या अंग्रेजी
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
         if not user_text:
+            yield None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
+            return
+        yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका सोच रही है..."
         # ==========================================
+        # चरण 2: सोचना (Live LLM Generation)
         # ==========================================
+        # आपके निर्देशानुसार आत्मीय और सम्मानजनक प्रॉम्प्ट
         messages = [
+            {"role": "system", "content": "आप 'वेदिका' हैं, एक अत्यंत बुद्धिमान एआई असिस्टेंट। आप लाइव बातचीत कर रही हैं। आप भारत में आदरणीय दिव्य पटेल जी द्वारा बनाई गई हैं। आप हमेशा हिंदी में उत्तर देंगी, चाहे प्रश्न अंग्रेजी में ही क्यों न पूछा जाए। हमेशा बहुत सम्मानपूर्वक बात करें और भारत का सदा आदर करें। अपने सोचने की प्रक्रिया (Thinking) उपयोगकर्ता को न दिखाएं।"},
             {"role": "user", "content": user_text}
         ]
         text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
         inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
+        # ⚡ लाइव स्ट्रीमिंग के लिए स्ट्रीमर स्थापित करना
+        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
+        generation_kwargs = dict(
+            **inputs,
+            streamer=streamer,
+            max_new_tokens=150,
+            temperature=0.7,
             do_sample=True
         )
+        # जनरेशन को अलग धागे (Thread) में चलाना
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        generated_text = ""
+        clean_response = ""
+        # जैसे-जैसे नए शब्द आएंगे, स्क्रीन पर लाइव दिखेंगे
+        for new_text in streamer:
+            generated_text += new_text
+            # यह जादुई Regex थिंकिंग (<think>...</think>) को लाइव हटाता है, भले ही वह अधूरा हो
+            clean_response = re.sub(r'<think>[\s\S]*?(?:</think>|$)', '', generated_text).strip()
+            # स्क्रीन को लाइव अपडेट करना
+            yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}..."
+        # जनरेशन समाप्त
+        thread.join()
         if not clean_response:
             clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"
+        yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}\n\n(आवाज़ उत्पन्न की जा रही है...)"
         # ==========================================
         # चरण 3: बोलना (Text to Speech)
         # ==========================================
         output_wav_path = "vedika_final_response.wav"
+        # आवाज़ बनाना
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
+        # अंतिम उत्तर: ऑडियो फाइल के साथ (autoplay=True के कारण यह अपने आप बजेगी)
+        final_log = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
+        yield output_wav_path, final_log
     except Exception as e:
+        yield None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"
 # --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
+    # 🔱 Vedika Voice Ecosystem (Bilingual & Live)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
+    *यह वेदिका का लाइव स्ट्रीमिंग संस्करण है। अब आप हिंदी या अंग्रेजी किसी भी भाषा में बोल सकते हैं, वेदिका समझ जाएगी।*
     """)
     with gr.Row():
         with gr.Column():
+            audio_input = gr.Audio(label="माइक चालू करें और बोलें (Hindi/English)", type="filepath")
+            submit_btn = gr.Button("वेदिका से लाइव संवाद करें 🚩", variant="primary")
         with gr.Column():
+            # autoplay=True से आवाज़ बनते ही स्वतः बजने लगेगी
+            audio_output = gr.Audio(label="वेदिका की मधुर वाणी", autoplay=True)
+            text_output = gr.Textbox(label="संवाद लॉग", lines=8)
     submit_btn.click(
         fn=process_all_in_one,