| import json |
| import struct |
| import gzip |
| import time |
| import uuid |
| import os |
| import re |
| import asyncio |
| import hashlib |
| import queue |
| from concurrent.futures import ThreadPoolExecutor |
| from typing import List, Optional, Dict, Any |
| from fastapi import FastAPI, Request, HTTPException, Depends |
| from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials |
| from fastapi.responses import StreamingResponse |
| from curl_cffi import requests |
| import uvicorn |
|
|
| app = FastAPI() |
| security = HTTPBearer() |
|
|
| |
| COOKIES_PATH = os.environ.get("COOKIES_PATH", "cookies.json") |
| PROXY = os.environ.get("HTTP_PROXY", None) |
|
|
| |
| _executor = ThreadPoolExecutor(max_workers=16) |
|
|
|
|
| def _load_cookies(path: str) -> dict: |
| try: |
| with open(path, 'r', encoding='utf-8') as f: |
| cookies_list = json.load(f) |
| return {c['name']: c['value'] for c in cookies_list} |
| except Exception as e: |
| print(f"Error loading cookies: {e}") |
| return {} |
|
|
|
|
| def _generate_device_id(seed: str) -> str: |
| h = hashlib.sha256(seed.encode()).hexdigest() |
| return str(int(h[:16], 16))[:19] |
|
|
|
|
| def _generate_session_id(seed: str) -> str: |
| h = hashlib.sha256(("session-" + seed).encode()).hexdigest() |
| return str(int(h[:16], 16))[:19] |
|
|
|
|
| def pack_connect_message(data: dict) -> bytes: |
| payload = json.dumps(data, separators=(',', ':')).encode('utf-8') |
| header = struct.pack('>BI', 0, len(payload)) |
| return header + payload |
|
|
|
|
| def _convert_citations(text: str) -> str: |
| """ๅฐ Kimi ็ [^N^] ๅผ็จๆ ผๅผ่ฝฌๆขไธบ [N]""" |
| return re.sub(r'\[\^(\d+)\^\]', r'[\1]', text) |
|
|
|
|
| def _format_references(refs: list) -> str: |
| """ๅฐๆ็ดขๅผ็จๆ ผๅผๅไธบ markdown ่ๆณจ""" |
| if not refs: |
| return "" |
| lines = ["\n\n---", "**Sources:**"] |
| for ref in refs: |
| base = ref.get("base", {}) |
| title = base.get("title", "") |
| url = base.get("url", "") |
| ref_id = ref.get("id", "") |
| if title and url: |
| lines.append(f"[{ref_id}] [{title}]({url})") |
| return "\n".join(lines) + "\n" |
|
|
|
|
| |
|
|
| def _parse_kimi_frames(buffer: bytes): |
| """่งฃๆ connect ๅธง๏ผ่ฟๅ (events, remaining_buffer)ใ |
| event ็ฑปๅ: |
| - {"type": "text", "content": "..."} |
| - {"type": "tool_status", "name": "...", "status": "..."} |
| - {"type": "search_refs", "refs": [...]} |
| - {"type": "done"} |
| """ |
| events = [] |
| while len(buffer) >= 5: |
| flag, length = struct.unpack_from('>BI', buffer, 0) |
| if len(buffer) < 5 + length: |
| break |
| payload_bytes = buffer[5:5 + length] |
| buffer = buffer[5 + length:] |
|
|
| if flag == 2: |
| try: |
| payload_bytes = gzip.decompress(payload_bytes) |
| except: |
| pass |
|
|
| if flag not in (0, 2): |
| continue |
|
|
| try: |
| data = json.loads(payload_bytes.decode('utf-8')) |
| except Exception as e: |
| print(f"DEBUG: Error decoding frame JSON: {e}") |
| continue |
|
|
| |
| if "done" in data: |
| events.append({"type": "done"}) |
| continue |
|
|
| |
| if "heartbeat" in data: |
| continue |
|
|
| op = data.get("op") |
| if op not in ("set", "append"): |
| continue |
|
|
| |
| if "block" in data and "text" in data["block"]: |
| content = data["block"]["text"].get("content", "") |
| if content: |
| events.append({"type": "text", "content": content}) |
|
|
| |
| if "message" in data and "blocks" in data.get("message", {}): |
| msg_role = data["message"].get("role", "") |
| if msg_role == "assistant": |
| content = "" |
| for block in data["message"]["blocks"]: |
| if "text" in block: |
| content += block["text"].get("content", "") |
| if content: |
| events.append({"type": "text", "content": content}) |
|
|
| |
| if "block" in data and "tool" in data["block"]: |
| tool = data["block"]["tool"] |
| name = tool.get("name", "") |
| status = tool.get("status", "") |
| if name and status: |
| events.append({"type": "tool_status", "name": name, "status": status}) |
|
|
| |
| msg = data.get("message", {}) |
| refs = msg.get("refs", {}) |
| if "usedSearchChunks" in refs: |
| events.append({"type": "search_refs", "refs": refs["usedSearchChunks"]}) |
|
|
| return events, buffer |
|
|
|
|
| |
| API_KEY = "sk-sseworld-kimi" |
|
|
| |
|
|
| class KimiBridge: |
| def __init__(self): |
| self.base_url = "https://www.kimi.com" |
|
|
| def create_session(self, api_key: str): |
| if api_key == API_KEY: |
| cookies = _load_cookies(COOKIES_PATH) |
| auth_token = cookies.get("kimi-auth", "") |
| fingerprint_seed = "cookies-default" |
| else: |
| cookies = {} |
| auth_token = api_key |
| fingerprint_seed = api_key |
|
|
| device_id = _generate_device_id(fingerprint_seed) |
| session_id = _generate_session_id(fingerprint_seed) |
|
|
| headers = { |
| "accept": "*/*", |
| "accept-language": "zh-CN,zh;q=0.9", |
| "authorization": f"Bearer {auth_token}", |
| "content-type": "application/connect+json", |
| "connect-protocol-version": "1", |
| "origin": "https://www.kimi.com", |
| "referer": "https://www.kimi.com/", |
| "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36", |
| "x-language": "zh-CN", |
| "x-msh-device-id": device_id, |
| "x-msh-platform": "web", |
| "x-msh-session-id": session_id, |
| "x-msh-version": "1.0.0", |
| "x-traffic-id": f"u{device_id[:20]}", |
| } |
|
|
| return requests.Session( |
| headers=headers, |
| cookies=cookies, |
| impersonate="chrome124", |
| proxy=PROXY, |
| ) |
|
|
|
|
| bridge = KimiBridge() |
|
|
|
|
| |
|
|
| def format_openai_stream_chunk(content: str, model: str, chat_id: str, *, role: str = None, finish_reason: str = None): |
| delta = {} |
| if role: |
| delta["role"] = role |
| if content: |
| delta["content"] = content |
| chunk = { |
| "id": chat_id, |
| "object": "chat.completion.chunk", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [{ |
| "index": 0, |
| "delta": delta, |
| "finish_reason": finish_reason |
| }] |
| } |
| return f"data: {json.dumps(chunk, ensure_ascii=False)}\n\n" |
|
|
|
|
| |
|
|
| def _sync_kimi_request(session, url, body_bytes): |
| return session.post(url, data=body_bytes, stream=True, timeout=30) |
|
|
|
|
| def _sync_read_all(response): |
| """ๅๆญฅ่ฏปๅๅฎๆดๅๅบ๏ผ่ฟๅ (full_text, search_refs)""" |
| full_content = "" |
| search_refs = [] |
| buffer = b"" |
| for chunk in response.iter_content(chunk_size=None): |
| if not chunk: |
| continue |
| buffer += chunk |
| events, buffer = _parse_kimi_frames(buffer) |
| for ev in events: |
| if ev["type"] == "text": |
| full_content += ev["content"] |
| elif ev["type"] == "search_refs": |
| search_refs = ev["refs"] |
| full_content = _convert_citations(full_content) |
| if search_refs: |
| full_content += _format_references(search_refs) |
| return full_content |
|
|
|
|
| |
|
|
| @app.middleware("http") |
| async def log_requests(request: Request, call_next): |
| print(f"DEBUG: Incoming request: {request.method} {request.url}") |
| response = await call_next(request) |
| print(f"DEBUG: Response status: {response.status_code}") |
| return response |
|
|
|
|
| KIMI_MODELS = { |
| "kimi-k2.5": {"scenario": "SCENARIO_K2D5", "thinking": False}, |
| "kimi-k2.5-thinking": {"scenario": "SCENARIO_K2D5", "thinking": True}, |
| } |
| DEFAULT_MODEL = "kimi-k2.5" |
|
|
|
|
| @app.get("/v1/models") |
| async def list_models(): |
| return { |
| "object": "list", |
| "data": [ |
| {"id": mid, "object": "model", "created": 0, "owned_by": "moonshot"} |
| for mid in KIMI_MODELS |
| ] |
| } |
|
|
|
|
| @app.post("/v1/chat/completions") |
| async def chat_completions(request: Request, credentials: HTTPAuthorizationCredentials = Depends(security)): |
| api_key = credentials.credentials |
| print(f"DEBUG: chat_completions endpoint hit, key prefix: {api_key[:6]}...") |
| print(f"DEBUG: Request headers: {dict(request.headers)}") |
|
|
| session = bridge.create_session(api_key) |
|
|
| try: |
| body = await request.json() |
| except Exception as e: |
| print(f"DEBUG: Failed to parse request JSON: {e}") |
| raise HTTPException(status_code=400, detail="Invalid JSON body") |
|
|
| messages = body.get("messages", []) |
| model = body.get("model", "kimi-k2.5") |
| stream = body.get("stream", False) |
| model_config = KIMI_MODELS.get(model, KIMI_MODELS[DEFAULT_MODEL]) |
|
|
| print(f"DEBUG: Received request: model={model}, thinking={model_config['thinking']}, stream={stream}, messages_count={len(messages)}") |
|
|
| if not messages: |
| raise HTTPException(status_code=400, detail="Messages are required") |
|
|
| |
| kimi_blocks = [] |
| for msg in messages: |
| role = msg.get("role", "user") |
| content = msg.get("content", "") |
| prefix = "User: " if role == "user" else "Assistant: " |
| kimi_blocks.append({"message_id": "", "text": {"content": f"{prefix}{content}\n"}}) |
|
|
| kimi_payload = { |
| "scenario": model_config["scenario"], |
| "tools": [{"type": "TOOL_TYPE_SEARCH", "search": {}}], |
| "message": { |
| "role": "user", |
| "blocks": kimi_blocks, |
| "scenario": model_config["scenario"] |
| }, |
| "options": {"thinking": model_config["thinking"]} |
| } |
|
|
| print(f"DEBUG: Kimi payload size: {len(json.dumps(kimi_payload))}") |
|
|
| url = f"{bridge.base_url}/apiv2/kimi.gateway.chat.v1.ChatService/Chat" |
| body_bytes = pack_connect_message(kimi_payload) |
|
|
| print(f"DEBUG: Forwarding to Kimi: {url}") |
|
|
| loop = asyncio.get_event_loop() |
|
|
| try: |
| response = await loop.run_in_executor(_executor, _sync_kimi_request, session, url, body_bytes) |
| print(f"DEBUG: Kimi response status: {response.status_code}") |
| except Exception as e: |
| print(f"DEBUG: Request to Kimi failed: {e}") |
| session.close() |
| raise HTTPException(status_code=500, detail=f"Failed to connect to Kimi: {str(e)}") |
|
|
| if response.status_code != 200: |
| error_text = response.text |
| print(f"DEBUG: Kimi error: {error_text}") |
| session.close() |
| raise HTTPException(status_code=response.status_code, detail=f"Kimi API error: {error_text}") |
|
|
| chat_id = str(uuid.uuid4()) |
|
|
| if stream: |
| async def generate(): |
| q = queue.Queue() |
| sentinel = object() |
| sent_role = False |
|
|
| def _stream_worker(): |
| try: |
| buf = b"" |
| search_refs = [] |
| for chunk in response.iter_content(chunk_size=None): |
| if not chunk: |
| continue |
| buf += chunk |
| events, buf = _parse_kimi_frames(buf) |
| for ev in events: |
| if ev["type"] == "text": |
| q.put(("text", _convert_citations(ev["content"]))) |
| elif ev["type"] == "tool_status" and ev["status"] == "STATUS_RUNNING": |
| q.put(("text", "\n\n> [Searching...]\n\n")) |
| elif ev["type"] == "search_refs": |
| search_refs = ev["refs"] |
| |
| if search_refs: |
| q.put(("text", _format_references(search_refs))) |
| finally: |
| q.put(sentinel) |
| session.close() |
|
|
| loop.run_in_executor(_executor, _stream_worker) |
|
|
| while True: |
| try: |
| item = await loop.run_in_executor(None, q.get, True, 0.5) |
| except: |
| continue |
| if item is sentinel: |
| break |
| _, content = item |
| if not sent_role: |
| yield format_openai_stream_chunk(content, model, chat_id, role="assistant") |
| sent_role = True |
| else: |
| yield format_openai_stream_chunk(content, model, chat_id) |
|
|
| |
| yield format_openai_stream_chunk("", model, chat_id, finish_reason="stop") |
| yield "data: [DONE]\n\n" |
|
|
| return StreamingResponse(generate(), media_type="text/event-stream") |
| else: |
| try: |
| full_content = await loop.run_in_executor(_executor, _sync_read_all, response) |
| finally: |
| session.close() |
|
|
| return { |
| "id": chat_id, |
| "object": "chat.completion", |
| "created": int(time.time()), |
| "model": model, |
| "choices": [{ |
| "index": 0, |
| "message": {"role": "assistant", "content": full_content}, |
| "finish_reason": "stop" |
| }], |
| "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0} |
| } |
|
|
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run("openai:app", host="0.0.0.0", port=8001, reload=False, log_level="debug") |
|
|