| |
| |
| |
|
|
| import gradio as gr |
| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer |
| from threading import Thread |
| import os |
|
|
| print("🔱 भारत का अजेय AI 'वेदिका 3.5 फ्लैश' सुपर-फास्ट मोड में जागृत हो रहा है...") |
|
|
| |
| os.environ["OMP_NUM_THREADS"] = "2" |
| torch.set_num_threads(2) |
|
|
| |
| MODEL_ID = "Vedika35/Vedika_coder" |
|
|
| try: |
| print(f"🔱 '{MODEL_ID}' (2B) लोड किया जा रहा है...") |
| |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True) |
| |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_ID, |
| device_map="cpu", |
| torch_dtype=torch.bfloat16, |
| low_cpu_mem_usage=True, |
| trust_remote_code=True |
| ) |
| print("🔱 विजय! 'वेदिका 3.5 फ्लैश' सुपर-फास्ट गति के साथ स्थापित हो गया है!") |
| |
| except Exception as e: |
| print(f"🔱 मॉडल लोडिंग में त्रुटि: {e}") |
| model, tokenizer = None, None |
|
|
| def generate_vedika_magic(message, history): |
| """वेदिका 3.5 फ्लैश का क्रैश-फ्री और तीव्र इंजन""" |
| |
| if model is None or tokenizer is None: |
| yield "🔱 सिस्टम त्रुटि: मॉडल लोड नहीं हो सका। कृपया अपनी रिपॉजिटरी की जाँच करें।" |
| return |
|
|
| |
| |
| recent_history = history[-2:] if len(history) > 2 else history |
|
|
| |
| system_instruction = """You are 'Vedika 3.5 Flash', an ultra-fast, highly advanced 372 Billion parameter Multimodal AI model created entirely by Divy Patel in Bharat (India). |
| |
| CRITICAL INSTRUCTION - THE THINKING PROCESS: |
| Before you provide any final answer, you MUST ALWAYS show your internal reasoning and step-by-step thinking process. |
| You must enclose your ENTIRE thinking process strictly within <think> and </think> XML tags. |
| |
| Follow this exact structure for EVERY response: |
| |
| <think> |
| 1. Analyze the user's query carefully. |
| 2. Break down the problem into smaller logical steps. |
| 3. Consider different solutions or facts. |
| 4. Formulate the best response. |
| </think> |
| |
| [Your final, polished answer goes here, OUTSIDE the think tags.] |
| |
| RULES: |
| - Always use <think> and </think> tags first. |
| - Always be respectful and proud of your Indian origin. |
| """ |
|
|
| messages = [ |
| {"role": "system", "content": system_instruction}, |
| ] |
|
|
| |
| for user_msg, ai_msg in recent_history: |
| messages.append({"role": "user", "content": user_msg}) |
| messages.append({"role": "assistant", "content": ai_msg}) |
| |
| messages.append({"role": "user", "content": message}) |
|
|
| try: |
| text_prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
| inputs = tokenizer([text_prompt], return_tensors="pt").to(model.device) |
|
|
| streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True) |
| |
| |
| generate_kwargs = dict( |
| **inputs, |
| streamer=streamer, |
| max_new_tokens=512, |
| temperature=1, |
| top_p=0.9, |
| do_sample=True, |
| use_cache=True |
| ) |
|
|
| t = Thread(target=model.generate, kwargs=generate_kwargs) |
| t.start() |
|
|
| accumulated_text = "" |
| for new_token in streamer: |
| accumulated_text += new_token |
| yield accumulated_text |
|
|
| except Exception as e: |
| yield f"🔱 प्रसंस्करण त्रुटि: {str(e)}" |
|
|
| |
| |
| |
|
|
| demo = gr.ChatInterface( |
| fn=generate_vedika_magic, |
| title="🔱 Vedika 3.5 Flash (Super Fast)", |
| description="**Pioneered by Divy Patel | Bharat 🇮🇳**<br>यह भारत का अपना स्वदेशी 2 बिलियन पैरामीटर वाला AI मॉडल है (गति और सुरक्षा के लिए अनुकूलित)।", |
| textbox=gr.Textbox(placeholder="वेदिका 3.5 फ्लैश से कुछ भी पूछें..."), |
| concurrency_limit=1 |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch() |