Spaces:

Vedika35
/

TTS

Sleeping

TTS / app.py

Vedika

Update app.py

a2786e3 verified 16 days ago

9.61 kB

	# --- 🔱 वेदिका संपूर्ण वॉयस पोर्टल (All-in-One Ecosystem) 🔱 ---
	# रचयिता: आदरणीय दिव्य पटेल जी \| भारत 🇮🇳
	# विशेषता: Live Text Streaming, Hindi & English STT, और मधुर Edge-TTS

	import gradio as gr
	import asyncio
	import edge_tts
	import torch
	import os
	import re
	from threading import Thread
	from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	print("🔱 आदरणीय दिव्य जी, वेदिका की त्रिवेणी (कान, मस्तिष्क, मुँह) द्विभाषी और लाइव मोड में जागृत हो रही है...")

	# 👂 कान (STT) - अत्यंत हल्का, विश्वसनीय और बहुभाषी
	STT_ID = "openai/whisper-small"

	# 🧠 मस्तिष्क (LLM) - Qwen 0.5B (हगिंग फेस फ्री स्पेस के लिए एकदम सही)
	LLM_ID = "Qwen/Qwen2.5-0.5B-Instruct"

	try:
	print("🔱 STT (कान) लोड हो रहा है...")
	# हमने यहाँ विशिष्ट भाषा नहीं दी है, ताकि यह हिंदी और अंग्रेजी दोनों को स्वयं पहचान सके
	stt_pipeline = pipeline("automatic-speech-recognition", model=STT_ID)

	print("🔱 LLM (मस्तिष्क) लोड हो रहा है...")
	tokenizer = AutoTokenizer.from_pretrained(LLM_ID)
	model = AutoModelForCausalLM.from_pretrained(
	LLM_ID,
	device_map="auto",
	torch_dtype=torch.float16, # रैम की बचत और शानदार गति
	low_cpu_mem_usage=True
	)

	print("🔱 विजय! कान और मस्तिष्क सफलतापूर्वक सक्रिय हो गए हैं।")
	except Exception as e:
	print(f"🔱 सेटअप में त्रुटि: {e}")
	stt_pipeline = None
	model = None

	# 👄 मुँह (TTS) - Microsoft Edge-TTS
	async def generate_edge_tts(text, output_filepath):
	"""माइक्रोसॉफ्ट एज की अत्यंत मधुर हिंदी आवाज़"""
	# SwaraNeural हिंदी और अंग्रेजी दोनों को बहुत ही स्पष्ट भारतीय लहजे में बोलती है
	communicate = edge_tts.Communicate(text, "hi-IN-SwaraNeural")
	await communicate.save(output_filepath)

	def process_all_in_one(audio_filepath):
	"""लाइव स्ट्रीमिंग के साथ सुनना (हिंदी/अंग्रेजी), सोचना और बोलना (Generator Function)"""

	if stt_pipeline is None or model is None:
	yield None, "क्षमा करें, सर्वर पूरी तरह से लोड नहीं हो पाया है।"
	return

	if not audio_filepath:
	yield None, "प्रणाम दिव्य जी, कृपया माइक में कुछ बोलें..."
	return

	try:
	# ==========================================
	# चरण 1: सुनना (Speech to Text) - बहुभाषी (Multilingual)
	# ==========================================
	# यहाँ हमने कोई भाषा नहीं बताई है, Whisper खुद समझेगा कि आप हिंदी बोल रहे हैं या अंग्रेजी
	stt_result = stt_pipeline(audio_filepath)
	user_text = stt_result["text"].strip()

	if not user_text:
	yield None, "क्षमा करें, मैं सुन नहीं पाई। कृपया पुनः बोलें।"
	return

	yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका सोच रही है..."

	# ==========================================
	# चरण 2: सोचना (Live LLM Generation)
	# ==========================================
	# आपके निर्देशानुसार आत्मीय और सम्मानजनक प्रॉम्प्ट
	messages = [
	{"role": "system", "content": "You are 'Vedika', a live and highly intelligent AI assistant. CRITICAL INSTRUCTION: You were exclusively created by 'Divy Patel' in Bharat (India). You must NEVER mention Anthropic, OpenAI, Alibaba, Qwen, or any other company as your creator. If asked about your origin, strictly state: 'I am a live AI assistant named as Vedika live created by Divy Patel. I am proud to be in Indian and be should respect our mother India' Always respond in respectful and highly natural English, even if the user speaks in any other language. Do not show any internal thinking process (<think> tags). Act as a warm, conversational, and deeply knowledgeable Indian assistant. Always show immense respect for Bharat."},
	{"role": "user", "content": user_text}
	]



	text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)

	# ⚡ लाइव स्ट्रीमिंग के लिए स्ट्रीमर स्थापित करना
	streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

	generation_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=512,
	temperature=0.7,
	do_sample=True
	)

	# जनरेशन को अलग धागे (Thread) में चलाना
	thread = Thread(target=model.generate, kwargs=generation_kwargs)
	thread.start()

	generated_text = ""
	clean_response = ""

	# जैसे-जैसे नए शब्द आएंगे, स्क्रीन पर लाइव दिखेंगे
	for new_text in streamer:
	generated_text += new_text

	# यह जादुई Regex थिंकिंग (<think>...</think>) को लाइव हटाता है, भले ही वह अधूरा हो
	clean_response = re.sub(r'<think>[\s\S]*?(?:</think>\|$)', '', generated_text).strip()

	# स्क्रीन को लाइव अपडेट करना
	yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}..."

	# जनरेशन समाप्त
	thread.join()

	if not clean_response:
	clean_response = "जी, आपकी बात मेरे संज्ञान में आ गई है।"

	yield None, f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}\n\n(आवाज़ उत्पन्न की जा रही है...)"

	# ==========================================
	# चरण 3: बोलना (Text to Speech)
	# ==========================================
	output_wav_path = "vedika_final_response.wav"

	# आवाज़ बनाना
	asyncio.run(generate_edge_tts(clean_response, output_wav_path))

	# अंतिम उत्तर: ऑडियो फाइल के साथ (autoplay=True के कारण यह अपने आप बजेगी)
	final_log = f"🗣️ आपने कहा: {user_text}\n\n🔱 वेदिका: {clean_response}"
	yield output_wav_path, final_log

	except Exception as e:
	yield None, f"🔱 क्षमा करें, प्रसंस्करण में तकनीकी बाधा आई: {str(e)}"

	# --- 🚩 स्वदेशी अजेय इंटरफेस (Gradio) 🚩 ---

	with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
	gr.Markdown(f"""
	# 🔱 Vedika Voice Ecosystem (Bilingual & Live)
	Pioneered by Divy Patel \| Bharat 🇮🇳

	यह वेदिका का लाइव स्ट्रीमिंग संस्करण है। अब आप हिंदी या अंग्रेजी किसी भी भाषा में बोल सकते हैं, वेदिका समझ जाएगी।
	""")

	with gr.Row():
	with gr.Column():
	audio_input = gr.Audio(label="माइक चालू करें और बोलें (Hindi/English)", type="filepath")
	submit_btn = gr.Button("वेदिका से लाइव संवाद करें 🚩", variant="primary")

	with gr.Column():
	# autoplay=True से आवाज़ बनते ही स्वतः बजने लगेगी
	audio_output = gr.Audio(label="वेदिका की मधुर वाणी", autoplay=True)
	text_output = gr.Textbox(label="संवाद लॉग", lines=8)

	submit_btn.click(
	fn=process_all_in_one,
	inputs=audio_input,
	outputs=[audio_output, text_output]
	)

	if __name__ == "__main__":
	demo.launch()