File size: 1,935 Bytes
6a82998 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 | import gradio as gr
import requests
import json
# Configuration
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"
def predict(message, history):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "text/event-stream"
}
payload = {
"model": "moonshotai/kimi-k2.6",
"messages": [{"role": "user", "content": message}],
"max_tokens": 16384,
"temperature": 1.0,
"stream": True,
"chat_template_kwargs": {"thinking": True}, # 🔱 Inachi Thinking Mode
}
response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)
partial_message = ""
for line in response.iter_lines():
if line:
# Decode the event-stream line
line = line.decode("utf-8")
if line.startswith("data: "):
data_str = line[6:]
if data_str == "[DONE]":
break
try:
data_json = json.loads(data_str)
# Check if 'choices' exists and has content
delta = data_json['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
partial_message += content
yield partial_message
except:
continue
# 🔱 Inachi UI Styling
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo:
gr.Markdown("# 🔱 INACHI V1.1 - Kimi-k2.6 Experimental Lab")
gr.Markdown("Testing NVIDIA MoonshotAI with **Thinking Mode** enabled.")
chat = gr.ChatInterface(
fn=predict,
title="Inachi Core Testing",
description="Master Architect MINZO-PRIME, the system is ready for testing."
)
demo.launch() |