Spaces:

Vedika35
/

TTS

Sleeping

App Files Files Community

Vedika commited on 16 days ago

Commit

3d7d3a3

verified ·

1 Parent(s): 0a9b39d

Update app.py

Browse files

Files changed (1) hide show

app.py +60 -47

app.py CHANGED Viewed

@@ -1,57 +1,60 @@
-# --- 🔱 वेदिका लाइव: कान और मुँह (संपूर्ण वॉयस इंटरफेस) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
-# विशेषता: .wav सपोर्ट, Edge-TTS (शून्य रैम खर्च), और LLM API एकीकरण
 import gradio as gr
 import asyncio
 import edge_tts
 import os
 import re
-from transformers import pipeline
-from gradio_client import Client
-print("🔱 आदरणीय दिव्य जी, वेदिका के 'कान' और 'मुँह' स्थापित हो रहे हैं...")
-# वैश्विक चर (Global Variables) ताकि त्रुटि न आए
-stt_pipeline = None
-llm_client = None
-# 🧠 आपके मस्तिष्क (LLM) का सुरक्षित API पता
-LLM_API_URL = "pateltraders55455/VEDIKA-3.5-LIVE"
 try:
-    # 👂 कान (Speech to Text): 'whisper-tiny' बहुत ही हल्का और 100% भरोसेमंद है
     print("🔱 STT (कान) लोड हो रहा है...")
-    stt_pipeline = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-    # 🧠 मस्तिष्क से जुड़ाव (API Client)
-    print("🔱 LLM (मस्तिष्क) से API संपर्क स्थापित किया जा रहा है...")
-    llm_client = Client(LLM_API_URL)
-    print("🔱 विजय! सभी प्रणालियाँ सफलतापूर्वक सक्रिय हो गई हैं।")
 except Exception as e:
-    print(f"🔱 सेटअप में भारी त्रुटि: {e}")
-# 👄 मुँह (Text to Speech): Microsoft Edge-TTS का ब्रह्मास्त्र (शून्य मॉडल लोड!)
 async def generate_edge_tts(text, output_filepath):
-    """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़ का उपयोग"""
-    # 'hi-IN-SwaraNeural' एक बहुत ही स्पष्ट और प्राकृतिक भारतीय महिला की आवाज़ है
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
-def process_voice_conversation(audio_filepath):
-    """यह फलन .wav सुनता है, API से सोचता है, और .wav में जवाब देता है"""
-    # यदि लोडिंग में कोई त्रुटि थी, तो यहीं रोक दें ताकि ऐप क्रैश न हो
-    if stt_pipeline is None or llm_client is None:
-        return None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है। कृपया लॉग्स की जाँच करें।"
     if not audio_filepath:
-        return None, "प्रणाम दिव्य जी, कृपया कुछ बोलें..."
     try:
         # ==========================================
-        # चरण 1: .wav ऑडियो सुनना (Speech to Text)
         # ==========================================
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
@@ -60,29 +63,41 @@ def process_voice_conversation(audio_filepath):
             return None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
         # ==========================================
-        # चरण 2: मस्तिष्क (LLM Space) से सोचना
         # ==========================================
-        # fn_index=0 आमतौर पर Gradio ChatInterface का डिफ़ॉल्ट एंडपॉइंट होता है
-        llm_result = llm_client.predict(
-            user_text,
-            api_name="/chat" # यदि यह काम न करे, तो api_name="/chat" की जगह fn_index=0 लिख दें
         )
-        ai_response = llm_result if isinstance(llm_result, str) else str(llm_result)
-        # थिंकिंग टैग्स (<think>...</think>) को पूरी तरह से हटाना
         clean_response = re.sub(r'<think>[\s\S]*?</think>', '', ai_response).strip()
-        # यदि सफाई के बाद कुछ न बचे, तो डिफ़ॉल्ट संदेश
         if not clean_response:
-            clean_response = "जी, मैं आपकी बात समझ रही हूँ।"
         # ==========================================
-        # चरण 3: वापस बोलना (Microsoft Edge TTS)
         # ==========================================
-        output_wav_path = "vedika_response.wav"
-        # चूँकि edge-tts एसिंक्रोनस (asynchronous) है, हम इसे ऐसे चलाएंगे:
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
         log_text = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
@@ -90,31 +105,29 @@ def process_voice_conversation(audio_filepath):
         return output_wav_path, log_text
     except Exception as e:
-        return None, f"🔱 क्षमा करें, प्रसंस्करण में त्रुटि आई: {str(e)}"
-# --- 🚩 अजेय स्वदेशी इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
-    # 🔱 Vedika Voice Ecosystem (Edge-TTS Powered)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
-    *यह पोर्टल एक ही स्थान पर सुनता और बोलता है (.wav सपोर्ट)। विचार विमर्श सुरक्षित रूप से API के माध्यम से हो रहा है।*
     """)
     with gr.Row():
         with gr.Column():
-            # type="filepath" सुनिश्चित करता है कि .wav फॉर्मेट सुरक्षित रहे
             audio_input = gr.Audio(label="माइक चालू करें और बोलें", type="filepath")
-            submit_btn = gr.Button("वेदिका से बात करें 🚩", variant="primary")
         with gr.Column():
-            # उत्तर भी .wav फॉर्मेट में आएगा
             audio_output = gr.Audio(label="वेदिका की मधुर वाणी")
             text_output = gr.Textbox(label="संवाद लॉग", lines=6)
     submit_btn.click(
-        fn=process_voice_conversation,
         inputs=audio_input,
         outputs=[audio_output, text_output]
     )

+# --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
 # रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
+# विशेषता: एक ही स्पेस में कान (STT), मस्तिष्क (LLM), और मुँह (TTS)
 import gradio as gr
 import asyncio
 import edge_tts
+import torch
 import os
 import re
+from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
+print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) जागृत हो रही है...")
+# 👂 कान (STT) - अत्यंत हल्का और विश्वसनीय
+STT_ID = "openai/whisper-tiny"
+# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही और हल्का अस्त्र)
+LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"
 try:
     print("🔱 STT (कान) लोड हो रहा है...")
+    stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
+    print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
+    tokenizer = AutoTokenizer.from_pretrained(LLM_ID)
+    model = AutoModelForCausalLM.from_pretrained(
+        LLM_ID,
+        device_map="auto",
+        torch_dtype=torch.float16, # रैम की बचत और शानदार गति
+        low_cpu_mem_usage=True
+    )
+    print("🔱 विजय! कान और मस्तिष्क सफलतापूर्वक सक्रिय हो गए हैं।")
 except Exception as e:
+    print(f"🔱 सेटअप में त्रुटि: {e}")
+    stt_pipeline = None
+    model = None
+# 👄 मुँह (TTS) - Microsoft Edge-TTS (शून्य रैम खर्च)
 async def generate_edge_tts(text, output_filepath):
+    """माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
     communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
     await communicate.save(output_filepath)
+def process_all_in_one(audio_filepath):
+    """एक ही फलन में सुनना, सोचना और बोलना"""
+    if stt_pipeline is None or model is None:
+        return None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
     if not audio_filepath:
+        return None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
     try:
         # ==========================================
+        # चरण 1: सुनना (Speech to Text)
         # ==========================================
         stt_result = stt_pipeline(audio_filepath)
         user_text = stt_result["text"].strip()
             return None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
         # ==========================================
+        # चरण 2: सोचना (LLM Generation)
         # ==========================================
+        messages = [
+            {"role": "system", "content": "आप 'वेदिका' हैं, एक अत्यंत बुद्धिमान और शालीन एआई, जिसे आदरणीय दिव्य पटेल जी ने भारत में बनाया है। हमेशा सम्मानजनक हिंदी में बहुत ही संक्षिप्त और सटीक उत्तर दें।"},
+            {"role": "user", "content": user_text}
+        ]
+        # प्रॉम्प्ट तैयार करना
+        text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
+        # उत्तर उत्पन्न करना
+        output_ids = model.generate(
+            **inputs,
+            max_new_tokens=150, # आवाज़ के लिए छोटे उत्तर बेहतर होते हैं
+            temperature=0.7,
+            do_sample=True
         )
+        # आउटपुट से केवल नया उत्तर निकालना
+        response_ids = output_ids[0][len(inputs.input_ids[0]):]
+        ai_response = tokenizer.decode(response_ids, skip_special_tokens=True)
+        # सुरक्षा के लिए थिंकिंग टैग्स हटाना
         clean_response = re.sub(r'<think>[\s\S]*?</think>', '', ai_response).strip()
         if not clean_response:
+            clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"
         # ==========================================
+        # चरण 3: बोलना (Text to Speech)
         # ==========================================
+        output_wav_path = "vedika_final_response.wav"
+        # एसिंक्रोनस TTS को चलाना
         asyncio.run(generate_edge_tts(clean_response, output_wav_path))
         log_text = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
         return output_wav_path, log_text
     except Exception as e:
+        return None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"
+# --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
     gr.Markdown(f"""
+    # 🔱 Vedika Voice Ecosystem (All-in-One)
     **Pioneered by Divy Patel | Bharat 🇮🇳**
+    *यह एक संपूर्ण स्वदेशी पोर्टल है जो एक ही सर्वर पर सुनता है (Whisper), सोचता है (Qwen 0.5B), और बोलता है (Edge-TTS)।*
     """)
     with gr.Row():
         with gr.Column():
             audio_input = gr.Audio(label="माइक चालू करें और बोलें", type="filepath")
+            submit_btn = gr.Button("वेदिका से संवाद करें 🚩", variant="primary")
         with gr.Column():
             audio_output = gr.Audio(label="वेदिका की मधुर वाणी")
             text_output = gr.Textbox(label="संवाद लॉग", lines=6)
     submit_btn.click(
+        fn=process_all_in_one,
         inputs=audio_input,
         outputs=[audio_output, text_output]
     )