Spaces:

Vedika35
/

TTS

Sleeping

App Files Files Community

Vedika commited on 16 days ago

Commit

9896caf

verified ·

1 Parent(s): a8d6c17

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -92

app.py CHANGED Viewed

@@ -1,118 +1,121 @@
-# --- 🔱 वेदिका 3.5 फ्लैश: भारत का अपना 2B AI (Super Fast Version) ---
-# रचयिता एवं मार्गदर्शक: दिव्य पटेल जी | भारत 🇮🇳
 import gradio as gr
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
-from threading import Thread
-from PIL import Image
 import os
-import cv2
-print("🔱 भारत का अजेय AI 'वेदिका 3.5 फ्लैश' सुपर-फास्ट मोड में जागृत हो रहा है...")
-# CPU Optimization
-os.environ["OMP_NUM_THREADS"] = "2"
-torch.set_num_threads(2)
-MODEL_ID = "pateltraders55455/Vedika-3.5-flash"
 try:
-    print(f"🔱 '{MODEL_ID}' (2B) लोड किया जा रहा है...")
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_ID,
-        device_map="cpu",
-        torch_dtype=torch.bfloat16,
-        low_cpu_mem_usage=True,
-        trust_remote_code=True
-    )
-    print("🔱 विजय! 'वेदिका 3.5 फ्लैश' तैयार है!")
-except Exception as e:
-    print(f"🔱 मॉडल लोडिंग में त्रुटि: {e}")
-    model, tokenizer = None, None
-def generate_vedika_magic(message, history, image=None, video=None):
-    if model is None or tokenizer is None:
-        return history + [{"role": "assistant", "content": "🔱 सिस्टम त्रुटि: मोडल लोड नहीं हो सका।"}]
-    recent_history = history[-2:] if len(history) > 2 else history
-    system_instruction = """You are 'Vedika 3.5 Flash'...
-    <think>
-    1. Analyze the user's query carefully.
-    2. Break down the problem into smaller logical steps.
-    3. Consider different solutions or facts.
-    4. Formulate the best response.
-    </think>
-    [Your final, polished answer goes here, OUTSIDE the think tags.]
     """
-    messages = [{"role": "system", "content": system_instruction}]
-    for user_msg, ai_msg in recent_history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": ai_msg})
-    if image is not None:
-        messages.append({"role": "user", "content": "Describe this image."})
-    if video is not None:
-        messages.append({"role": "user", "content": "Describe this video."})
-    messages.append({"role": "user", "content": message})
     try:
-        text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)
-        streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
-        generate_kwargs = dict(
-            **inputs,
-            streamer=streamer,
-            max_new_tokens=512,
-            temperature=1,
-            top_p=0.9,
-            do_sample=True,
-            use_cache=True
         )
-        t = Thread(target=model.generate, kwargs=generate_kwargs)
-        t.start()
-# पिछले स्ट्रिमर लूप को जारी रखते हैं
-        response_text = ""
-        for new_token in streamer:
-            response_text += new_token
-        # सही फॉर्मेट में परिणाम को लौटाएँ ताकि Gradio को समस्या न हो
-        return history + [{"role": "assistant", "content": response_text}]
     except Exception as e:
-        return history + [{"role": "assistant", "content": f"🔱 प्रसंस्करण त्रुटि: {str(e)}"}]
-# =============================================================================
-# 🔱 वेदिका 3.5 फ्लैश का मल्टीमीडिया UI + Send बटन
-# ============================================================================
-with gr.Blocks() as demo:
-    gr.Markdown("## 🔱 Vedika 3.5 Flash (Super Fast)")
-    with gr.Row():
-        text_input = gr.Textbox(placeholder="वेदिका 3.5 फ्लैश से कुछ भी पूछें...")
-        image_input = gr.Image(type="filepath", label="Upload Photo")
-        video_input = gr.Video(label="Upload Video")
-    send_btn = gr.Button("Send")
-    chat_output = gr.Chatbot()
-    def chat_fn(message, history, image, video):
-        return generate_vedika_magic(message, history, image, video)
-    send_btn.click(chat_fn, [text_input, chat_output, image_input, video_input], chat_output)
 if __name__ == "__main__":
-    demo.launch()

+# --- 🔱 वेदिका लाइव: कान और मुँह (Audio Interface) 🔱 ---
+# रचयिता: आदरणीय दिव्य पटेल जी | भारत 🇮🇳
+# कार्य: .wav ऑडियो लेना, STT करना, LLM API को कॉल करना, और TTS से वापस .wav देना
 import gradio as gr
 import torch
+import torchaudio
+import librosa
+import soundfile as sf
+from transformers import pipeline
+from gradio_client import Client
 import os
+import re
+print("🔱 दिव्य जी, वेदिका के 'कान' और 'मुँह' स्थापित हो रहे हैं...")
+# 👂 कान (Speech to Text) - NVIDIA का फुर्तीला मॉडल
+STT_ID = "nvidia/stt_hi_conformer_transducer_large"
+# 👄 मुँह (Text to Speech) - हल्का हिंदी मॉडल
+TTS_ID = "facebook/mms-tts-hin"
+# मस्तिष्क (LLM) का API पता (आपका अपना स्पेस)
+LLM_API_URL = "pateltraders55455/VEDIKA-3.5-LIVE"
 try:
+    # केवल हल्के ऑडियो मॉडल्स को इस स्पेस में लोड किया जा रहा है (रैम की भारी बचत)
+    print("🔱 STT (कान) लोड हो रहा है...")
+    stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)
+    print("🔱 TTS (मुँह) लोड हो रहा है...")
+    tts_pipeline = pipeline("text-to-speech", model=TTS_ID)
+    # LLM क्लाइंट को स्थापित करना
+    llm_client = Client(LLM_API_URL)
+    print("🔱 विजय! कान, मुँह और मस्तिष्क का API संपर्क स्थापित हो गया है।")
+except Exception as e:
+    print(f"🔱 सेटअप में त्रुटि: {e}")
+def process_wav_to_wav(audio_filepath):
+    """
+    यह फलन .wav ऑडियो लेता है और .wav ऑडियो ही वापस करता है।
     """
+    if not audio_filepath:
+        return None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
     try:
+        # ==========================================
+        # चरण 1: .wav ऑडियो सुनना (Speech to Text)
+        # ==========================================
+        stt_result = stt_pipeline(audio_filepath)
+        user_text = stt_result["text"]
+        if not user_text.strip():
+            return None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः प्रयास करें।"
+        # ==========================================
+        # चरण 2: मस्तिष्क (LLM Space) से संपर्क करना
+        # ==========================================
+        # gradio_client के माध्यम से आपके दूसरे स्पेस को टेक्स्ट भेजा जा रहा है
+        # नोट: ChatInterface में आमतौर पर api_name="/chat" या पहला फंक्शन होता है
+        llm_result = llm_client.predict(
+            user_text, # यूज़र का संदेश
+            api_name="/chat" # यदि यह काम न करे, तो इसे हटाकर fn_index=0 कर सकते हैं
         )
+        # LLM का उत्तर (यह मॉडल के आउटपुट फॉर्मेट पर निर्भर करता है)
+        ai_response = llm_result if isinstance(llm_result, str) else str(llm_result)
+        # थिंकिंग टैग्स (<think>...</think>) को साफ करना ताकि वेदिका केवल अंतिम उत्तर बोले
+        clean_response = re.sub(r'<think>[\s\S]*?</think>', '', ai_response).strip()
+        # ==========================================
+        # चरण 3: वापस बोलना (Text to .wav Speech)
+        # ==========================================
+        tts_output = tts_pipeline(clean_response)
+        # ऑडियो डेटा और सैंपलिंग रेट निकालना
+        audio_data = tts_output["audio"][0]
+        sample_rate = tts_output["sampling_rate"]
+        # इसे एक .wav फाइल के रूप में सहेजना ताकि UI में सही से बजे
+        output_wav_path = "vedika_response.wav"
+        sf.write(output_wav_path, audio_data, sample_rate)
+        log_text = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
+        return output_wav_path, log_text
     except Exception as e:
+        return None, f"🔱 क्षमा करें, प्रसंस्करण में त्रुटि आई: {str(e)}"
+# --- 🚩 अजेय स्वदेशी इंटरफेस (Gradio) 🚩 ---
+with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown(f"""
+    # 🔱 Vedika Voice Portal (Microservices Architecture)
+    **Pioneered by Divy Patel | Bharat 🇮🇳**
+    *यह पोर्टल केव�� आवाज़ सुनता और बोलता है। विचार करने का कार्य सुरक्षित रूप से 'VEDIKA-3.5-LIVE' स्पेस में हो रहा है।*
+    """)
+    with gr.Row():
+        with gr.Column():
+            # type="filepath" सुनिश्चित करता है कि इनपुट .wav फॉर्मेट में ही सेव हो
+            audio_input = gr.Audio(label="माइक चालू करें और बोलें", type="filepath", format="wav")
+            submit_btn = gr.Button("वेदिका को भेजें 🚩", variant="primary")
+        with gr.Column():
+            # आउटपुट भी .wav फॉर्मेट में आएगा
+            audio_output = gr.Audio(label="वेदिका की वाणी", type="filepath", format="wav")
+            text_output = gr.Textbox(label="संवाद लॉग", lines=6)
+    submit_btn.click(
+        fn=process_wav_to_wav,
+        inputs=audio_input,
+        outputs=[audio_output, text_output]
+    )
 if __name__ == "__main__":
+    demo.launch()