import gradio as gr from huggingface_hub import InferenceClient import os import json import time from pathlib import Path from typing import List, Tuple # ==================== CONFIGURATION ==================== HF_TOKEN = os.getenv("HF_TOKEN") if not HF_TOKEN: raise ValueError("HF_TOKEN environment variable is required! Add it in Space Settings → Repository secrets.") # Kimi-K2.6 served via the Novita inference provider on HF # Note: provider="novita" is set in InferenceClient, so NO :novita suffix here MODEL_NAME = "moonshotai/Kimi-K2.6" HISTORY_FILE = "chat_history.json" MAX_HISTORY_LENGTH = 20 # Keep last 20 messages # Token limits for response lengths RESPONSE_LENGTHS = { "Short": 512, "Medium": 1024, "Long": 4096, } MAX_FILE_SIZE_MB = 5 # ==================== INFERENCE CLIENT ==================== def create_client(): """Create and validate the HF Inference Client with graceful fallback.""" try: c = InferenceClient( provider="novita", api_key=HF_TOKEN, ) print(f"✅ Connected to {MODEL_NAME} via Novita") return c except Exception as e: print(f"⚠️ Client initialization warning: {e}") return None client = create_client() # ==================== MEMORY MANAGEMENT ==================== def load_history() -> List[dict]: """Load conversation history from JSON file.""" if not os.path.exists(HISTORY_FILE): return [] try: with open(HISTORY_FILE, "r", encoding="utf-8") as f: history = json.load(f) if len(history) > MAX_HISTORY_LENGTH: history = history[-MAX_HISTORY_LENGTH:] save_history(history) return history except Exception as e: print(f"Error loading history: {e}") return [] def save_history(history: List[dict]): """Save conversation history to JSON file.""" try: if len(history) > MAX_HISTORY_LENGTH: history = history[-MAX_HISTORY_LENGTH:] with open(HISTORY_FILE, "w", encoding="utf-8") as f: json.dump(history, f, indent=2, ensure_ascii=False) except Exception as e: print(f"Error saving history: {e}") def clear_memory() -> str: """Clear conversation history file.""" if os.path.exists(HISTORY_FILE): os.remove(HISTORY_FILE) return "✅ Memory cleared successfully!" # ==================== FILE PROCESSING ==================== def extract_text_from_pdf(file_path: str) -> str: """Extract text from PDF using PyPDF2.""" try: import PyPDF2 text = "" with open(file_path, "rb") as f: reader = PyPDF2.PdfReader(f) for page in reader.pages: text += page.extract_text() + "\n" return text.strip() except ImportError: return "❌ PyPDF2 not installed. PDF reading is unavailable." except Exception as e: return f"❌ Error reading PDF: {str(e)}" def process_uploaded_file(file) -> Tuple[str, str]: """Process an uploaded file and return (content_string, status_message).""" if file is None: return "", "" try: file_path = file.name file_size_mb = os.path.getsize(file_path) / (1024 * 1024) if file_size_mb > MAX_FILE_SIZE_MB: return "", f"❌ File too large ({file_size_mb:.1f} MB). Max: {MAX_FILE_SIZE_MB} MB" file_ext = Path(file_path).suffix.lower() file_name = Path(file_path).name if file_ext == ".pdf": content = extract_text_from_pdf(file_path) elif file_ext in [".txt", ".py", ".js", ".ts", ".html", ".css", ".json", ".md", ".java", ".cpp", ".c", ".rs", ".go"]: with open(file_path, "r", encoding="utf-8", errors="ignore") as f: content = f.read() else: return "", f"❌ Unsupported file type: {file_ext}" # Truncate very long files if len(content) > 50_000: content = content[:50_000] + "\n\n[... Content truncated due to length ...]" formatted = f"📎 **File: {file_name}**\n```\n{content}\n```\n\n" return formatted, f"✅ File loaded: {file_name} ({file_size_mb:.2f} MB)" except Exception as e: return "", f"❌ Error processing file: {str(e)}" # ==================== CHAT ENGINE ==================== def build_messages(history: List[dict], full_message: str) -> List[dict]: """ Convert Gradio history format into the OpenAI-compatible messages list that the Novita/Kimi API expects. """ system_prompt = ( "You are Kimi, a highly intelligent and helpful AI assistant created by Moonshot AI. " "You excel at reasoning, coding, mathematics, and analysis. " "Provide clear, structured, and accurate responses." ) messages = [{"role": "system", "content": system_prompt}] # Add existing conversation history for turn in history: if turn.get("role") in ("user", "assistant") and turn.get("content"): # Strip out any blocks from assistant history # (Kimi docs: do NOT include reasoning in multi-turn history) content = turn["content"] messages.append({"role": turn["role"], "content": content}) # Add current user message messages.append({ "role": "user", "content": [{"type": "text", "text": full_message}] }) return messages def parse_kimi_response(completion) -> Tuple[str, str]: """ Parse the API response. Returns (reasoning_text, final_answer_text). reasoning_text may be empty if the model didn't return it. """ try: msg = completion.choices[0].message reasoning = getattr(msg, "reasoning", "") or "" content = msg.content or "" return reasoning.strip(), content.strip() except Exception as e: return "", f"⚠️ Error parsing response: {str(e)}" def chat_engine( message: str, history: List[dict], response_length: str, file_content: str = "", ): """ Main chat function. Returns updated Gradio history. history is a list of {role, content} dicts (Gradio messages format). """ if not message.strip(): return history if client is None: err = ( "⚠️ Model client not initialized.\n\n" "Please check that:\n" "1. `HF_TOKEN` is set in your Space Secrets.\n" "2. The token has **read** access and Inference API is enabled." ) history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": err}) return history try: full_message = (file_content + message) if file_content else message messages = build_messages(history, full_message) max_tokens = RESPONSE_LENGTHS.get(response_length, 1024) # ── API call ──────────────────────────────────────────────────────── completion = client.chat.completions.create( model=MODEL_NAME, messages=messages, max_tokens=max_tokens, temperature=1.0, top_p=0.95, stream=False, ) # ─────────────────────────────────────────────────────────────────── reasoning, answer = parse_kimi_response(completion) # Format the bot reply: show reasoning in a collapsible block if present if reasoning: bot_reply = ( f"
\n" f"🧠 Kimi's Reasoning (click to expand)\n\n" f"{reasoning}\n\n" f"
\n\n" f"---\n\n{answer}" ) else: bot_reply = answer # Save to persistent file (clean format, no reasoning blocks) persistent = load_history() persistent.append({"user": message, "bot": answer}) save_history(persistent) # Update Gradio history history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": bot_reply}) return history except Exception as e: error_str = str(e) # ── Friendly error hints ──────────────────────────────────────────── if "401" in error_str or "Unauthorized" in error_str: hint = ( "🔑 **401 Unauthorized** — Your HF token is invalid or missing.\n\n" "Fix: Go to Space Settings → Secrets → add `HF_TOKEN` with a valid token." ) elif "403" in error_str or "Forbidden" in error_str: hint = ( "🚫 **403 Forbidden** — Your token doesn't have access to this model or provider.\n\n" "Fix: Make sure your HF token has `read` permission and you have accepted " "the model's license on Hugging Face." ) elif "429" in error_str or "rate limit" in error_str.lower(): hint = ( "⏳ **429 Rate Limited** — Too many requests. Please wait 30 seconds and try again.\n\n" "This is a Novita provider limit, not a code error." ) elif "503" in error_str or "loading" in error_str.lower(): hint = ( "⏳ **503 Model Loading** — The model is warming up on the server.\n\n" "Wait 30–60 seconds and resend your message." ) elif "model" in error_str.lower() and "not found" in error_str.lower(): hint = ( "❓ **Model Not Found** — The model ID or provider tag may have changed.\n\n" f"Current model: `{MODEL_NAME}`\n" "Check the HF model page for the latest provider tag." ) else: hint = f"⚠️ **Unexpected Error:**\n```\n{error_str}\n```\n\nPlease try again in a moment." history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": hint}) return history # ==================== GRADIO INTERFACE ==================== def create_interface(): """Build and return the Gradio Blocks interface.""" css = """ /* ── Google Font ── */ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap'); /* ── Root / Body ── */ body, .gradio-container { font-family: 'Inter', sans-serif !important; background: #0d0f14 !important; color: #e2e8f0 !important; } /* ── Header block ── */ .kimi-header { background: linear-gradient(135deg, #1a1f2e 0%, #0f1623 50%, #1a1035 100%); border: 1px solid rgba(139, 92, 246, 0.25); border-radius: 16px; padding: 28px 36px; margin-bottom: 20px; text-align: center; box-shadow: 0 4px 32px rgba(139, 92, 246, 0.12); } .kimi-header h1 { font-size: 2rem !important; font-weight: 700 !important; background: linear-gradient(90deg, #a78bfa, #7c3aed, #c4b5fd) !important; -webkit-background-clip: text !important; -webkit-text-fill-color: transparent !important; margin: 0 0 6px 0 !important; } .kimi-header p { color: #94a3b8 !important; font-size: 0.9rem !important; margin: 0 !important; } .kimi-header .badge { display: inline-block; background: rgba(124, 58, 237, 0.18); border: 1px solid rgba(139, 92, 246, 0.35); color: #c4b5fd; border-radius: 999px; padding: 3px 12px; font-size: 0.78rem; margin-top: 10px; } /* ── Chat bubble overrides ── */ .message-wrap { padding: 6px 0 !important; } .user .message-bubble-border, .user .message { background: linear-gradient(135deg, #3b1d8a, #5b21b6) !important; border: none !important; color: #f5f3ff !important; border-radius: 18px 18px 4px 18px !important; } .bot .message-bubble-border, .bot .message { background: #1e2333 !important; border: 1px solid rgba(139, 92, 246, 0.2) !important; color: #e2e8f0 !important; border-radius: 18px 18px 18px 4px !important; } /* ── Input textbox ── */ .input-wrap textarea { background: #141824 !important; border: 1px solid rgba(139, 92, 246, 0.3) !important; border-radius: 12px !important; color: #e2e8f0 !important; font-size: 0.95rem !important; padding: 12px 16px !important; resize: none !important; transition: border-color 0.2s; } .input-wrap textarea:focus { border-color: rgba(139, 92, 246, 0.7) !important; outline: none !important; box-shadow: 0 0 0 3px rgba(139, 92, 246, 0.12) !important; } /* ── Send button ── */ #send-btn { background: linear-gradient(135deg, #7c3aed, #5b21b6) !important; border: none !important; border-radius: 12px !important; color: #fff !important; font-weight: 600 !important; font-size: 0.95rem !important; letter-spacing: 0.02em !important; transition: all 0.2s ease !important; box-shadow: 0 4px 14px rgba(124, 58, 237, 0.4) !important; } #send-btn:hover { background: linear-gradient(135deg, #6d28d9, #4c1d95) !important; box-shadow: 0 6px 20px rgba(124, 58, 237, 0.55) !important; transform: translateY(-1px) !important; } /* ── Settings accordion ── */ .gr-accordion { background: #141824 !important; border: 1px solid rgba(139, 92, 246, 0.2) !important; border-radius: 12px !important; margin-top: 12px !important; } .gr-accordion .label-wrap { color: #a78bfa !important; font-weight: 600 !important; } /* ── Radio buttons ── */ .gr-radio label { background: #1e2333 !important; border: 1px solid rgba(139, 92, 246, 0.2) !important; border-radius: 8px !important; color: #cbd5e1 !important; padding: 6px 14px !important; transition: all 0.15s; } .gr-radio label:hover { border-color: rgba(139, 92, 246, 0.5) !important; color: #e2e8f0 !important; } .gr-radio input:checked + label { background: rgba(124, 58, 237, 0.25) !important; border-color: #7c3aed !important; color: #c4b5fd !important; } /* ── Action buttons ── */ #clear-file-btn { background: #1e2333 !important; border: 1px solid rgba(139, 92, 246, 0.25) !important; color: #94a3b8 !important; border-radius: 8px !important; font-size: 0.82rem !important; transition: all 0.15s; } #clear-file-btn:hover { border-color: rgba(139, 92, 246, 0.5) !important; color: #c4b5fd !important; } #clear-chat-btn { background: rgba(239, 68, 68, 0.1) !important; border: 1px solid rgba(239, 68, 68, 0.3) !important; color: #f87171 !important; border-radius: 8px !important; font-size: 0.82rem !important; transition: all 0.15s; } #clear-chat-btn:hover { background: rgba(239, 68, 68, 0.2) !important; border-color: rgba(239, 68, 68, 0.5) !important; } /* ── Status messages ── */ .file-status p { color: #4ade80 !important; font-size: 0.82rem !important; } /* ── Footer tips ── */ .tips-block { background: #141824; border: 1px solid rgba(139, 92, 246, 0.15); border-radius: 12px; padding: 14px 20px; margin-top: 12px; } .tips-block p, .tips-block li { color: #64748b !important; font-size: 0.82rem !important; } /* ── Chatbot container ── */ .chatbot-wrap { border: 1px solid rgba(139, 92, 246, 0.2) !important; border-radius: 14px !important; background: #0f1219 !important; overflow: hidden !important; } """ with gr.Blocks( title="Kimi K2.6 · AI Reasoning Chatbot", css=css, theme=gr.themes.Base( primary_hue="violet", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), ).set( body_background_fill="#0d0f14", body_text_color="#e2e8f0", block_background_fill="#141824", block_border_color="rgba(139,92,246,0.2)", input_background_fill="#141824", button_primary_background_fill="linear-gradient(135deg,#7c3aed,#5b21b6)", button_primary_text_color="#ffffff", ), ) as demo: # ── Header ───────────────────────────────────────────────────────── gr.HTML("""

🌙 Kimi K2.6

Moonshot AI's 1-Trillion-Parameter Reasoning Model

⚡ Powered by Novita Inference API · 256K Context Window
""") # ── State ────────────────────────────────────────────────────────── file_content_state = gr.State("") # ── Chat window ──────────────────────────────────────────────────── chatbot = gr.Chatbot( label="", height=520, show_label=False, type="messages", render_markdown=True, bubble_full_width=False, elem_classes=["chatbot-wrap"], avatar_images=( None, "https://huggingface.co/moonshotai/Kimi-K2.6/resolve/main/figures/kimi-logo.png", ), ) # ── Input row ────────────────────────────────────────────────────── with gr.Row(equal_height=True): msg = gr.Textbox( label="", placeholder="✦ Ask Kimi anything — coding, math, reasoning, analysis...", lines=2, max_lines=6, scale=8, show_label=False, elem_classes=["input-wrap"], container=False, ) send_btn = gr.Button( "Send 🚀", variant="primary", scale=1, min_width=100, elem_id="send-btn", ) # ── Settings accordion ───────────────────────────────────────────── with gr.Accordion("⚙️ Settings & File Upload", open=False): with gr.Row(): response_length = gr.Radio( choices=["Short", "Medium", "Long"], value="Medium", label="📏 Response Length", info="Short · 512 tok Medium · 1024 tok Long · 4096 tok", scale=2, ) gr.HTML("
") file_upload = gr.File( label="📎 Upload File for Analysis (PDF · Code · Text — max 5 MB)", file_types=[".txt", ".pdf", ".py", ".js", ".ts", ".html", ".css", ".json", ".md", ".java", ".cpp", ".c", ".rs", ".go"], type="filepath", ) file_status = gr.Markdown("", elem_classes=["file-status"]) with gr.Row(): clear_file_btn = gr.Button( "🗑️ Clear File", size="sm", variant="secondary", elem_id="clear-file-btn", ) clear_btn = gr.Button( "🧹 Clear Chat", size="sm", variant="stop", elem_id="clear-chat-btn", ) clear_status = gr.Markdown("") # ── Tips footer ──────────────────────────────────────────────────── gr.HTML("""
💡 Tips
""") # ── Event handler functions ──────────────────────────────────────── def handle_file_upload(file): content, status = process_uploaded_file(file) return content, status def clear_file(): return None, "", "" def clear_conversation(): result = clear_memory() return [], result # ── Wire up events ───────────────────────────────────────────────── file_upload.change( fn=handle_file_upload, inputs=[file_upload], outputs=[file_content_state, file_status], ) clear_file_btn.click( fn=clear_file, outputs=[file_upload, file_content_state, file_status], ) send_btn.click( fn=chat_engine, inputs=[msg, chatbot, response_length, file_content_state], outputs=[chatbot], api_name="chat", ) msg.submit( fn=chat_engine, inputs=[msg, chatbot, response_length, file_content_state], outputs=[chatbot], api_name="chat", ) clear_btn.click( fn=clear_conversation, outputs=[chatbot, clear_status], api_name="clear_memory", ) return demo # ==================== LAUNCH ==================== if __name__ == "__main__": demo = create_interface() demo.launch( server_name="0.0.0.0", server_port=7860, share=False, )