import gradio as gr
import requests
import json

# Configuration
INVOKE_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
API_KEY = "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"

def predict(message, history):
    headers = {
        "Authorization": f"Bearer {API_KEY}",
        "Accept": "text/event-stream"
    }

    payload = {
        "model": "moonshotai/kimi-k2.6",
        "messages": [{"role": "user", "content": message}],
        "max_tokens": 16384,
        "temperature": 1.0,
        "stream": True,
        "chat_template_kwargs": {"thinking": True}, # 🔱 Inachi Thinking Mode
    }

    response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)
    
    partial_message = ""
    for line in response.iter_lines():
        if line:
            # Decode the event-stream line
            line = line.decode("utf-8")
            if line.startswith("data: "):
                data_str = line[6:]
                if data_str == "[DONE]":
                    break
                try:
                    data_json = json.loads(data_str)
                    # Check if 'choices' exists and has content
                    delta = data_json['choices'][0]['delta']
                    if 'content' in delta:
                        content = delta['content']
                        partial_message += content
                        yield partial_message
                except:
                    continue

# 🔱 Inachi UI Styling
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", neutral_hue="slate")) as demo:
    gr.Markdown("# 🔱 INACHI V1.1 - Kimi-k2.6 Experimental Lab")
    gr.Markdown("Testing NVIDIA MoonshotAI with **Thinking Mode** enabled.")
    
    chat = gr.ChatInterface(
        fn=predict,
        title="Inachi Core Testing",
        description="Master Architect MINZO-PRIME, the system is ready for testing."
    )

demo.launch()