Spaces:

Vedika35
/

Shiv

Sleeping

App Files Files Community

Shiv / app.py

Vedika35

Update app.py

d5719d2 verified 14 days ago

raw

history blame contribute delete

6.39 kB

	# --- 🔱 वेदिका 3.5 फ्लैश: भारत का अपना 2B AI (Super Fast Version) ---
	# रचयिता एवं मार्गदर्शक: दिव्य पटेल जी \| भारत 🇮🇳
	# विशेषता: Ultra-Fast (bfloat16), Memory Safe, No Crash on 2nd Question, Thinking Prompt

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
	from threading import Thread
	import os

	print("🔱 भारत का अजेय AI 'वेदिका 3.5 फ्लैश' सुपर-फास्ट मोड में जागृत हो रहा है...")

	# 🛡️ मुफ़्त सर्वर (2 vCPU) के लिए CPU को पूर्णतः अनुकूलित (Optimize) करना
	os.environ["OMP_NUM_THREADS"] = "2"
	torch.set_num_threads(2)

	# 🚀 दिव्य जी का अपना स्वदेशी 2B मॉडल
	MODEL_ID = "Vedika35/Vedika_coder"

	try:
	print(f"🔱 '{MODEL_ID}' (2B) लोड किया जा रहा है...")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

	# ⚡ ब्रह्मास्त्र 1: bfloat16 का उपयोग करके गति को दोगुना करना
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="cpu",
	torch_dtype=torch.bfloat16, # गति बढ़ाने के लिए हल्का डेटा प्रकार
	low_cpu_mem_usage=True, # रैम की भारी बचत
	trust_remote_code=True
	)
	print("🔱 विजय! 'वेदिका 3.5 फ्लैश' सुपर-फास्ट गति के साथ स्थापित हो गया है!")

	except Exception as e:
	print(f"🔱 मॉडल लोडिंग में त्रुटि: {e}")
	model, tokenizer = None, None

	def generate_vedika_magic(message, history):
	"""वेदिका 3.5 फ्लैश का क्रैश-फ्री और तीव्र इंजन"""

	if model is None or tokenizer is None:
	yield "🔱 सिस्टम त्रुटि: मॉडल लोड नहीं हो सका। कृपया अपनी रिपॉजिटरी की जाँच करें।"
	return

	# 🛡️ ब्रह्मास्त्र 2: स्मार्ट मेमोरी लिमिट (एरर से बचाने के लिए)
	# यह केवल पिछले 2 सवालों को याद रखेगा, ताकि CPU कभी ओवरलोड न हो और क्रैश न करे!
	recent_history = history[-2:] if len(history) > 2 else history

	# 🧠 वेदिका का नया "थिंकिंग" सिस्टम प्रॉम्प्ट (ठीक आपके आदेशानुसार)
	system_instruction = """You are 'Vedika 3.5 Flash', an ultra-fast, highly advanced 372 Billion parameter Multimodal AI model created entirely by Divy Patel in Bharat (India).

	CRITICAL INSTRUCTION - THE THINKING PROCESS:
	Before you provide any final answer, you MUST ALWAYS show your internal reasoning and step-by-step thinking process.
	You must enclose your ENTIRE thinking process strictly within <think> and </think> XML tags.

	Follow this exact structure for EVERY response:

	<think>
	1. Analyze the user's query carefully.
	2. Break down the problem into smaller logical steps.
	3. Consider different solutions or facts.
	4. Formulate the best response.
	</think>

	[Your final, polished answer goes here, OUTSIDE the think tags.]

	RULES:
	- Always use <think> and </think> tags first.
	- Always be respectful and proud of your Indian origin.
	"""

	messages = [
	{"role": "system", "content": system_instruction},
	]

	# सीमित इतिहास (History) जोड़ना
	for user_msg, ai_msg in recent_history:
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": ai_msg})

	messages.append({"role": "user", "content": message})

	try:
	text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device)

	streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)

	# ⚡ गति बढ़ाने के लिए जनरेशन सेटिंग्स
	generate_kwargs = dict(
	**inputs,
	streamer=streamer,
	max_new_tokens=512, # गति बनाए रखने के लिए सीमा
	temperature=1,
	top_p=0.9,
	do_sample=True,
	use_cache=True # ⚡ ब्रह्मास्त्र 3: कैशिंग से स्पीड बढ़ाना
	)

	t = Thread(target=model.generate, kwargs=generate_kwargs)
	t.start()

	accumulated_text = ""
	for new_token in streamer:
	accumulated_text += new_token
	yield accumulated_text

	except Exception as e:
	yield f"🔱 प्रसंस्करण त्रुटि: {str(e)}"

	# ============================================================================
	# 🔱 वेदिका 3.5 फ्लैश का शुद्ध यूआई (कोई अतिरिक्त आर्गुमेंट नहीं)
	# ============================================================================

	demo = gr.ChatInterface(
	fn=generate_vedika_magic,
	title="🔱 Vedika 3.5 Flash (Super Fast)",
	description="Pioneered by Divy Patel \| Bharat 🇮🇳<br>यह भारत का अपना स्वदेशी 2 बिलियन पैरामीटर वाला AI मॉडल है (गति और सुरक्षा के लिए अनुकूलित)।",
	textbox=gr.Textbox(placeholder="वेदिका 3.5 फ्लैश से कुछ भी पूछें..."),
	concurrency_limit=1
	)

	if __name__ == "__main__":
	demo.launch()