| import gradio as gr |
| import requests |
| import json |
|
|
| |
| INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions" |
| API_KEY = "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" |
|
|
| def predict(message, history): |
| headers = { |
| "Authorization": f"Bearer {API_KEY}", |
| "Accept": "text/event-stream" |
| } |
|
|
| payload = { |
| "model": "moonshotai/kimi-k2.6", |
| "messages": [{"role": "user", "content": message}], |
| "max_tokens": 16384, |
| "temperature": 1.0, |
| "stream": True, |
| "chat_template_kwargs": {"thinking": True}, |
| } |
|
|
| response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True) |
| |
| partial_message = "" |
| for line in response.iter_lines(): |
| if line: |
| |
| line = line.decode("utf-8") |
| if line.startswith("data: "): |
| data_str = line[6:] |
| if data_str == "[DONE]": |
| break |
| try: |
| data_json = json.loads(data_str) |
| |
| delta = data_json['choices'][0]['delta'] |
| if 'content' in delta: |
| content = delta['content'] |
| partial_message += content |
| yield partial_message |
| except: |
| continue |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo: |
| gr.Markdown("# π± INACHI V1.1 - Kimi-k2.6 Experimental Lab") |
| gr.Markdown("Testing NVIDIA MoonshotAI with **Thinking Mode** enabled.") |
| |
| chat = gr.ChatInterface( |
| fn=predict, |
| title="Inachi Core Testing", |
| description="Master Architect MINZO-PRIME, the system is ready for testing." |
| ) |
|
|
| demo.launch() |