import gradio as gr import requests import json # Configuration INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions" API_KEY = "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" def predict(message, history): headers = { "Authorization": f"Bearer {API_KEY}", "Accept": "text/event-stream" } payload = { "model": "moonshotai/kimi-k2.6", "messages": [{"role": "user", "content": message}], "max_tokens": 16384, "temperature": 1.0, "stream": True, "chat_template_kwargs": {"thinking": True}, # 🔱 Inachi Thinking Mode } response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True) partial_message = "" for line in response.iter_lines(): if line: # Decode the event-stream line line = line.decode("utf-8") if line.startswith("data: "): data_str = line[6:] if data_str == "[DONE]": break try: data_json = json.loads(data_str) # Check if 'choices' exists and has content delta = data_json['choices'][0]['delta'] if 'content' in delta: content = delta['content'] partial_message += content yield partial_message except: continue # 🔱 Inachi UI Styling with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo: gr.Markdown("# 🔱 INACHI V1.1 - Kimi-k2.6 Experimental Lab") gr.Markdown("Testing NVIDIA MoonshotAI with **Thinking Mode** enabled.") chat = gr.ChatInterface( fn=predict, title="Inachi Core Testing", description="Master Architect MINZO-PRIME, the system is ready for testing." ) demo.launch()