Spaces:

MINZO4546
/

hi

Sleeping

hi / app.py

Create app.py

6a82998 verified 4 days ago

1.94 kB

	import gradio as gr
	import requests
	import json

	# Configuration
	INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
	API_KEY = "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"

	def predict(message, history):
	headers = {
	"Authorization": f"Bearer {API_KEY}",
	"Accept": "text/event-stream"
	}

	payload = {
	"model": "moonshotai/kimi-k2.6",
	"messages": [{"role": "user", "content": message}],
	"max_tokens": 16384,
	"temperature": 1.0,
	"stream": True,
	"chat_template_kwargs": {"thinking": True}, # 🔱 Inachi Thinking Mode
	}

	response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)

	partial_message = ""
	for line in response.iter_lines():
	if line:
	# Decode the event-stream line
	line = line.decode("utf-8")
	if line.startswith("data: "):
	data_str = line[6:]
	if data_str == "[DONE]":
	break
	try:
	data_json = json.loads(data_str)
	# Check if 'choices' exists and has content
	delta = data_json['choices'][0]['delta']
	if 'content' in delta:
	content = delta['content']
	partial_message += content
	yield partial_message
	except:
	continue

	# 🔱 Inachi UI Styling
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo:
	gr.Markdown("# 🔱 INACHI V1.1 - Kimi-k2.6 Experimental Lab")
	gr.Markdown("Testing NVIDIA MoonshotAI with Thinking Mode enabled.")

	chat = gr.ChatInterface(
	fn=predict,
	title="Inachi Core Testing",
	description="Master Architect MINZO-PRIME, the system is ready for testing."
	)

	demo.launch()