hi / app.py
MINZO4546's picture
Create app.py
6a82998 verified
import gradio as gr
import requests
import json
# Configuration
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"
def predict(message, history):
headers = {
"Authorization": f"Bearer {API_KEY}",
"Accept": "text/event-stream"
}
payload = {
"model": "moonshotai/kimi-k2.6",
"messages": [{"role": "user", "content": message}],
"max_tokens": 16384,
"temperature": 1.0,
"stream": True,
"chat_template_kwargs": {"thinking": True}, # πŸ”± Inachi Thinking Mode
}
response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)
partial_message = ""
for line in response.iter_lines():
if line:
# Decode the event-stream line
line = line.decode("utf-8")
if line.startswith("data: "):
data_str = line[6:]
if data_str == "[DONE]":
break
try:
data_json = json.loads(data_str)
# Check if 'choices' exists and has content
delta = data_json['choices'][0]['delta']
if 'content' in delta:
content = delta['content']
partial_message += content
yield partial_message
except:
continue
# πŸ”± Inachi UI Styling
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo:
gr.Markdown("# πŸ”± INACHI V1.1 - Kimi-k2.6 Experimental Lab")
gr.Markdown("Testing NVIDIA MoonshotAI with **Thinking Mode** enabled.")
chat = gr.ChatInterface(
fn=predict,
title="Inachi Core Testing",
description="Master Architect MINZO-PRIME, the system is ready for testing."
)
demo.launch()