infinityonline commited on
Commit
d04ae9a
ยท
verified ยท
1 Parent(s): f891dab

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +232 -227
main.py CHANGED
@@ -5,7 +5,6 @@ import asyncio
5
  import threading
6
  import json
7
  import re
8
- import httpx
9
  from typing import Optional
10
  from fastapi import FastAPI, Request
11
  from fastapi.responses import JSONResponse, StreamingResponse
@@ -15,22 +14,22 @@ from fastapi.responses import JSONResponse, StreamingResponse
15
  # ====================================================================
16
  API_SECRET_KEY = os.getenv("API_SECRET_KEY", "change-me-secret")
17
 
18
- # โ”€โ”€ Duck.ai models (HTTP ู…ุจุงุดุฑ - ุจุฏูˆู† Playwright) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
19
  DUCK_MODELS = {
20
- "gpt-4o-mini": "gpt-4o-mini",
21
- "gpt-5-mini": "gpt-5-mini",
22
- "o3-mini": "o3-mini",
23
- "gpt-oss-120b": "gpt-oss-120b",
24
- "claude-haiku-4-5": "claude-haiku-4-5",
25
- "claude-3-haiku-20240307": "claude-3-haiku-20240307",
26
- "llama-4-scout": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
27
- "meta-llama/Llama-3.3-70B-Instruct-Turbo": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
28
- "mistral-small-4": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
29
- "mistralai/Mistral-Small-24B-Instruct-2501": "mistralai/Mistral-Small-24B-Instruct-2501",
30
- "mistralai/Mixtral-8x7B-Instruct-v0.1": "mistralai/Mixtral-8x7B-Instruct-v0.1",
31
  }
32
 
33
- # โ”€โ”€ ZAI models (Playwright - browser scraping) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
  ZAI_MODELS = [
35
  "GLM-5.1", "GLM-5-Turbo", "GLM-5V-Turbo",
36
  "GLM-5", "GLM-4.7", "GLM-4.6V", "GLM-4.5-Air"
@@ -38,133 +37,11 @@ ZAI_MODELS = [
38
 
39
  ALL_MODELS = list(DUCK_MODELS.keys()) + ZAI_MODELS
40
 
41
- DUCK_STATUS_URL = "https://duckduckgo.com/duckchat/v1/status"
42
- DUCK_CHAT_URL = "https://duckduckgo.com/duckchat/v1/chat"
43
-
44
- DUCK_HEADERS_BASE = {
45
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
46
- "Accept-Language": "en-US,en;q=0.9",
47
- "Origin": "https://duckduckgo.com",
48
- "Referer": "https://duckduckgo.com/",
49
- }
50
 
51
  # ====================================================================
52
- # Duck.ai HTTP Client
53
- # ====================================================================
54
-
55
- async def _get_vqd_token() -> str:
56
- """ุงู„ุญุตูˆู„ ุนู„ู‰ ุฑู…ุฒ VQD ู…ู† duck.ai - ู…ุทู„ูˆุจ ู„ูƒู„ ู…ุญุงุฏุซุฉ ุฌุฏูŠุฏุฉ"""
57
- headers = {**DUCK_HEADERS_BASE, "x-vqd-accept": "1"}
58
- async with httpx.AsyncClient(timeout=30) as client:
59
- r = await client.get(DUCK_STATUS_URL, headers=headers)
60
- token = r.headers.get("x-vqd-4", "")
61
- if not token:
62
- raise Exception("ูุดู„ ุงู„ุญุตูˆู„ ุนู„ู‰ VQD token ู…ู† duck.ai")
63
- return token
64
-
65
-
66
- def _build_duck_messages(messages: list) -> list:
67
- """ุชุญูˆูŠู„ messages ุฅู„ู‰ ุงู„ุตูŠุบุฉ ุงู„ุชูŠ ุชู‚ุจู„ู‡ุง duck.ai"""
68
- result = []
69
- for m in messages:
70
- role = m.get("role", "user")
71
- # duck.ai ุชุฏุนู… user ูˆ assistant ูู‚ุท - system ูŠุชุญูˆู„ ู„ู€ user
72
- if role == "system":
73
- role = "user"
74
- if role not in ("user", "assistant"):
75
- continue
76
- content = _extract_content(m)
77
- if content.strip():
78
- result.append({"role": role, "content": content})
79
- return result
80
-
81
-
82
- def _extract_content(msg: dict) -> str:
83
- content = msg.get("content", "")
84
- if isinstance(content, list):
85
- parts = []
86
- for item in content:
87
- if isinstance(item, dict):
88
- parts.append(item.get("text", item.get("content", str(item))))
89
- else:
90
- parts.append(str(item))
91
- return "\n".join(parts)
92
- return str(content) if content else ""
93
-
94
-
95
- async def duck_chat_complete(model: str, messages: list) -> str:
96
- """ุงุณุชุฏุนุงุก duck.ai ูˆุฅุฑุฌุงุน ุงู„ู†ุต ุงู„ูƒุงู…ู„"""
97
- duck_model = DUCK_MODELS.get(model, "gpt-4o-mini")
98
- vqd_token = await _get_vqd_token()
99
-
100
- payload = {
101
- "model": duck_model,
102
- "messages": _build_duck_messages(messages),
103
- }
104
- headers = {
105
- **DUCK_HEADERS_BASE,
106
- "Content-Type": "application/json",
107
- "Accept": "text/event-stream",
108
- "x-vqd-4": vqd_token,
109
- }
110
-
111
- async with httpx.AsyncClient(timeout=120) as client:
112
- r = await client.post(DUCK_CHAT_URL, json=payload, headers=headers)
113
- if r.status_code == 429:
114
- raise Exception("duck.ai rate limit - ุญุงูˆู„ ู…ุฑุฉ ุฃุฎุฑู‰ ุจุนุฏ ู‚ู„ูŠู„")
115
- r.raise_for_status()
116
-
117
- full_text = ""
118
- for line in r.text.splitlines():
119
- if line.startswith("data: "):
120
- data = line[6:].strip()
121
- if data == "[DONE]":
122
- break
123
- try:
124
- chunk = json.loads(data)
125
- full_text += chunk.get("message", "")
126
- except Exception:
127
- pass
128
- return full_text.strip()
129
-
130
-
131
- async def duck_chat_stream(model: str, messages: list):
132
- """ุงุณุชุฏุนุงุก duck.ai ุจูˆุถุน streaming - ูŠูุฑุฌุน generator"""
133
- duck_model = DUCK_MODELS.get(model, "gpt-4o-mini")
134
- vqd_token = await _get_vqd_token()
135
-
136
- payload = {
137
- "model": duck_model,
138
- "messages": _build_duck_messages(messages),
139
- }
140
- headers = {
141
- **DUCK_HEADERS_BASE,
142
- "Content-Type": "application/json",
143
- "Accept": "text/event-stream",
144
- "x-vqd-4": vqd_token,
145
- }
146
-
147
- async with httpx.AsyncClient(timeout=120) as client:
148
- async with client.stream("POST", DUCK_CHAT_URL, json=payload, headers=headers) as r:
149
- if r.status_code == 429:
150
- raise Exception("duck.ai rate limit")
151
- r.raise_for_status()
152
- async for line in r.aiter_lines():
153
- if line.startswith("data: "):
154
- data = line[6:].strip()
155
- if data == "[DONE]":
156
- return
157
- try:
158
- chunk = json.loads(data)
159
- token = chunk.get("message", "")
160
- if token:
161
- yield token
162
- except Exception:
163
- pass
164
-
165
-
166
- # ====================================================================
167
- # Playwright Browser Engine (ZAI models)
168
  # ====================================================================
169
 
170
  class AsyncBrowserThread(threading.Thread):
@@ -179,12 +56,12 @@ class AsyncBrowserThread(threading.Thread):
179
  asyncio.set_event_loop(self.loop)
180
  self.loop.run_until_complete(self._start_browser())
181
  self.ready_event.set()
182
- print("[ZAI-SERVER] Browser is ready!")
183
  self.loop.run_forever()
184
 
185
  async def _start_browser(self):
186
  from playwright.async_api import async_playwright
187
- print("[ZAI-SERVER] Starting Chrome...")
188
  self.playwright = await async_playwright().start()
189
  self.browser = await self.playwright.chromium.launch(
190
  headless=True,
@@ -199,9 +76,124 @@ class AsyncBrowserThread(threading.Thread):
199
  "--no-zygote",
200
  ],
201
  )
202
- print("[ZAI-SERVER] Chrome launched!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
- async def _talk_to_zai(self, prompt: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
  context = await self.browser.new_context(
206
  user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
207
  viewport={"width": 1920, "height": 1080},
@@ -218,7 +210,7 @@ class AsyncBrowserThread(threading.Thread):
218
  await page.fill("textarea#chat-input", prompt)
219
  await asyncio.sleep(0.5)
220
  await page.press("textarea#chat-input", "Enter")
221
- print(f"[ZAI-SERVER] Sent ({len(prompt)} chars)")
222
  await asyncio.sleep(2)
223
  await page.wait_for_selector("#response-content-container", timeout=120000)
224
  last_text = ""
@@ -241,16 +233,25 @@ class AsyncBrowserThread(threading.Thread):
241
  last_text = current_text
242
  unchanged_cnt = 0
243
  await asyncio.sleep(1.0)
244
- print(f"[ZAI-SERVER] Response: {len(last_text)} chars")
245
  return last_text.strip()
246
  except Exception as e:
247
- print(f"[ZAI-SERVER] Error: {e}")
248
  raise e
249
  finally:
250
  await page.close()
251
  await context.close()
252
 
253
- def process_request(self, prompt: str):
 
 
 
 
 
 
 
 
 
254
  if not self.ready_event.wait(timeout=60):
255
  raise Exception("Browser not ready")
256
  future = asyncio.run_coroutine_threadsafe(self._talk_to_zai(prompt), self.loop)
@@ -262,9 +263,48 @@ browser_engine.start()
262
 
263
 
264
  # ====================================================================
265
- # Prompt Builder (ู„ู„ู€ ZAI models - ู…ุญููˆุธ ุจุงู„ูƒุงู…ู„)
266
  # ====================================================================
267
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
268
  def format_prompt(messages, tools=None):
269
  parts = []
270
  system_parts = []
@@ -274,16 +314,7 @@ def format_prompt(messages, tools=None):
274
  for msg in messages:
275
  role = msg.get("role", "")
276
  msg_type = msg.get("type", "")
277
- content = msg.get("content", "")
278
-
279
- if isinstance(content, list):
280
- text_parts = []
281
- for item in content:
282
- if isinstance(item, dict):
283
- text_parts.append(item.get("text", item.get("content", str(item))))
284
- else:
285
- text_parts.append(str(item))
286
- content = "\n".join(text_parts)
287
 
288
  if role == "system":
289
  system_parts.append(content)
@@ -301,8 +332,8 @@ def format_prompt(messages, tools=None):
301
  func_args = msg.get("arguments", "{}")
302
  parts.append(f"[PREVIOUS TOOL CALL: Called '{func_name}' with arguments: {func_args}]")
303
  elif role == "assistant":
304
- assistant_content = content if content else ""
305
- tool_calls_in_msg = msg.get("tool_calls", [])
306
  if tool_calls_in_msg:
307
  tc_desc = []
308
  for tc in tool_calls_in_msg:
@@ -326,7 +357,7 @@ def format_prompt(messages, tools=None):
326
  final += "=== SYSTEM INSTRUCTIONS (FOLLOW STRICTLY) ===\n" + "\n\n".join(system_parts) + "\n=== END OF INSTRUCTIONS ===\n\n"
327
 
328
  if tools and not has_tool_results:
329
- final += format_tools_instruction(tools, user_question)
330
 
331
  if has_tool_results:
332
  final += "=== CONTEXT FROM TOOLS ===\nThe following information was retrieved by the tools you requested.\nUse ONLY this information to answer the user's question.\n\n"
@@ -340,7 +371,7 @@ def format_prompt(messages, tools=None):
340
  return final
341
 
342
 
343
- def format_tools_instruction(tools, user_question=""):
344
  instruction = "\n=== MANDATORY TOOL USAGE ===\n"
345
  instruction += "You MUST use one of the tools below to answer this question.\n"
346
  instruction += "Do NOT answer directly. Do NOT say you don't have information.\n"
@@ -349,7 +380,6 @@ def format_tools_instruction(tools, user_question=""):
349
  instruction += '{"tool_calls": [{"name": "TOOL_NAME", "arguments": {"param": "value"}}]}\n\n'
350
  instruction += "RULES:\n- Your ENTIRE response must be valid JSON only\n- No markdown, no code blocks, no explanation\n- No text before or after the JSON\n\n"
351
  instruction += "Available tools:\n\n"
352
-
353
  for tool in tools:
354
  func = tool.get("function", tool)
355
  name = func.get("name", "unknown")
@@ -365,7 +395,6 @@ def format_tools_instruction(tools, user_question=""):
365
  req = "required" if pname in required_params else "optional"
366
  instruction += f" - {pname} ({ptype}, {req}): {pdesc}\n"
367
  instruction += "\n"
368
-
369
  instruction += "=== END OF TOOLS ===\n\n"
370
  first_func = (tools[0] if tools else {}).get("function", tools[0] if tools else {})
371
  first_name = first_func.get("name", "tool")
@@ -380,12 +409,10 @@ def parse_tool_calls(response_text):
380
  m = re.search(r'```(?:json)?\s*\n?(.*?)\n?\s*```', cleaned, re.DOTALL)
381
  if m:
382
  cleaned = m.group(1).strip()
383
-
384
  candidates = [cleaned]
385
  m2 = re.search(r'\{[\s\S]*"tool_calls"[\s\S]*\}', cleaned)
386
  if m2:
387
  candidates.append(m2.group(0))
388
-
389
  for candidate in candidates:
390
  try:
391
  parsed = json.loads(candidate)
@@ -394,8 +421,8 @@ def parse_tool_calls(response_text):
394
  if isinstance(raw_calls, list) and raw_calls:
395
  formatted = []
396
  for call in raw_calls:
397
- tool_name = call.get("name", "")
398
- arguments = call.get("arguments", {})
399
  arguments_str = json.dumps(arguments, ensure_ascii=False) if isinstance(arguments, dict) else str(arguments)
400
  formatted.append({
401
  "id": f"call_{uuid.uuid4().hex[:24]}",
@@ -409,19 +436,19 @@ def parse_tool_calls(response_text):
409
 
410
 
411
  # ====================================================================
412
- # Helpers
413
  # ====================================================================
414
 
415
- def _is_duck_model(model: str) -> bool:
416
- return model in DUCK_MODELS
417
-
418
-
419
  def _auth(request: Request) -> bool:
420
  auth = request.headers.get("authorization", "")
421
  return auth.replace("Bearer ", "").strip() == API_SECRET_KEY
422
 
423
 
424
- def _build_response(start_time, model, response_text, messages, tools, is_duck):
 
 
 
 
425
  p_tokens = sum(len(_extract_content(m).split()) for m in messages)
426
  c_tokens = len(response_text.split())
427
  tool_calls = parse_tool_calls(response_text) if tools else None
@@ -455,7 +482,6 @@ def _build_response(start_time, model, response_text, messages, tools, is_duck):
455
  app = FastAPI(title="ZAI + DuckAI API Server")
456
 
457
 
458
- # โ”€โ”€ POST /v1/chat/completions โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
459
  @app.post("/v1/chat/completions")
460
  async def chat_completions(request: Request):
461
  try:
@@ -472,48 +498,29 @@ async def chat_completions(request: Request):
472
 
473
  model = data.get("model", "gpt-4o-mini")
474
  tools = data.get("tools", None)
475
- do_stream = data.get("stream", False)
476
  start_time = time.time()
477
 
478
  try:
479
- # โ”€โ”€ Duck.ai path โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
480
  if _is_duck_model(model):
481
- if do_stream:
482
- chunk_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
483
-
484
- async def event_stream():
485
- async for token in duck_chat_stream(model, messages):
486
- chunk = {
487
- "id": chunk_id, "object": "chat.completion.chunk",
488
- "created": int(start_time), "model": model,
489
- "choices": [{"index": 0, "delta": {"content": token}, "finish_reason": None}],
490
- }
491
- yield f"data: {json.dumps(chunk)}\n\n"
492
- final = {
493
- "id": chunk_id, "object": "chat.completion.chunk",
494
- "created": int(start_time), "model": model,
495
- "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
496
- }
497
- yield f"data: {json.dumps(final)}\n\n"
498
- yield "data: [DONE]\n\n"
499
-
500
- return StreamingResponse(event_stream(), media_type="text/event-stream")
501
-
502
- response_text = await duck_chat_complete(model, messages)
503
- return _build_response(start_time, model, response_text, messages, tools, is_duck=True)
504
-
505
- # โ”€โ”€ ZAI (Playwright) path โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
506
  else:
507
- prompt = format_prompt(messages, tools=tools)
508
- print(f"[ZAI-SERVER] Processing ({len(prompt)} chars)")
509
- response_text = browser_engine.process_request(prompt)
510
- return _build_response(start_time, model, response_text, messages, tools, is_duck=False)
 
 
 
511
 
512
  except Exception as e:
 
513
  return JSONResponse(status_code=500, content={"error": {"message": str(e)}})
514
 
515
 
516
- # โ”€โ”€ POST /v1/responses โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
517
  @app.post("/v1/responses")
518
  async def responses(request: Request):
519
  try:
@@ -545,26 +552,29 @@ async def responses(request: Request):
545
 
546
  try:
547
  if _is_duck_model(model):
548
- response_text = await duck_chat_complete(model, messages)
 
 
 
549
  else:
550
  prompt = format_prompt(messages, tools=tools)
551
- response_text = browser_engine.process_request(prompt)
 
 
552
 
553
  p_tokens = sum(len(_extract_content(m).split()) for m in messages)
554
  c_tokens = len(response_text.split())
555
  tool_calls = parse_tool_calls(response_text) if tools else None
556
 
557
  if tool_calls:
558
- output_items = []
559
- for tc in tool_calls:
560
- output_items.append({
561
- "type": "function_call",
562
- "id": tc["id"],
563
- "call_id": tc["id"],
564
- "name": tc["function"]["name"],
565
- "arguments": tc["function"]["arguments"],
566
- "status": "completed",
567
- })
568
  return {
569
  "id": f"resp-{uuid.uuid4().hex[:29]}", "object": "response",
570
  "created_at": int(start_time), "model": model, "status": "completed",
@@ -586,7 +596,6 @@ async def responses(request: Request):
586
  return JSONResponse(status_code=500, content={"error": {"message": str(e)}})
587
 
588
 
589
- # โ”€โ”€ GET /v1/models โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
590
  @app.get("/v1/models")
591
  async def list_models(request: Request):
592
  if not _auth(request):
@@ -594,17 +603,13 @@ async def list_models(request: Request):
594
  return {
595
  "object": "list",
596
  "data": [
597
- {
598
- "id": m,
599
- "object": "model",
600
- "owned_by": "duck.ai" if m in DUCK_MODELS else "zai",
601
- }
602
  for m in ALL_MODELS
603
  ],
604
  }
605
 
606
 
607
- # ๏ฟฝ๏ฟฝโ”€ GET /health & GET / โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
608
  @app.get("/health")
609
  @app.get("/")
610
  async def health_check():
@@ -618,4 +623,4 @@ async def health_check():
618
 
619
  if __name__ == "__main__":
620
  import uvicorn
621
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
5
  import threading
6
  import json
7
  import re
 
8
  from typing import Optional
9
  from fastapi import FastAPI, Request
10
  from fastapi.responses import JSONResponse, StreamingResponse
 
14
  # ====================================================================
15
  API_SECRET_KEY = os.getenv("API_SECRET_KEY", "change-me-secret")
16
 
17
+ # โ”€โ”€ Duck.ai models โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
18
+ # ุงู„ู€ key ู‡ูˆ ู…ุง ูŠุฑุณู„ู‡ ุงู„ู…ุณุชุฎุฏู…ุŒ ุงู„ู€ value ู‡ูˆ ID ุงู„ู†ู…ูˆุฐุฌ ููŠ duck.ai
19
  DUCK_MODELS = {
20
+ "gpt-4o-mini": "gpt-4o-mini",
21
+ "gpt-5-mini": "gpt-5-mini",
22
+ "o3-mini": "o3-mini",
23
+ "gpt-oss-120b": "gpt-oss-120b",
24
+ "claude-haiku-4-5": "claude-haiku-4-5",
25
+ "claude-3-haiku": "claude-3-haiku-20240307",
26
+ "llama-4-scout": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
27
+ "llama-3.3-70b": "meta-llama/Llama-3.3-70B-Instruct-Turbo",
28
+ "mistral-small-4": "mistralai/Mistral-Small-3.1-24B-Instruct-2503",
29
+ "mistral-small": "mistralai/Mistral-Small-24B-Instruct-2501",
 
30
  }
31
 
32
+ # โ”€โ”€ ZAI models (Playwright โ†’ chat.z.ai) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
  ZAI_MODELS = [
34
  "GLM-5.1", "GLM-5-Turbo", "GLM-5V-Turbo",
35
  "GLM-5", "GLM-4.7", "GLM-4.6V", "GLM-4.5-Air"
 
37
 
38
  ALL_MODELS = list(DUCK_MODELS.keys()) + ZAI_MODELS
39
 
 
 
 
 
 
 
 
 
 
40
 
41
  # ====================================================================
42
+ # Shared Browser Engine (Playwright)
43
+ # ูŠุณุชุฎุฏู…ู‡ ZAI ุนุจุฑ chat.z.ai
44
+ # ูˆูŠุณุชุฎุฏู…ู‡ Duck.ai ุนุจุฑ duck.ai (ู…ุชุตูุญ ุญู‚ูŠู‚ูŠ ูŠุชุฌุงูˆุฒ ุงู„ุญู…ุงูŠุฉ)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  # ====================================================================
46
 
47
  class AsyncBrowserThread(threading.Thread):
 
56
  asyncio.set_event_loop(self.loop)
57
  self.loop.run_until_complete(self._start_browser())
58
  self.ready_event.set()
59
+ print("[SERVER] Browser is ready!")
60
  self.loop.run_forever()
61
 
62
  async def _start_browser(self):
63
  from playwright.async_api import async_playwright
64
+ print("[SERVER] Starting Chrome...")
65
  self.playwright = await async_playwright().start()
66
  self.browser = await self.playwright.chromium.launch(
67
  headless=True,
 
76
  "--no-zygote",
77
  ],
78
  )
79
+ print("[SERVER] Chrome launched!")
80
+
81
+ # โ”€โ”€ Duck.ai via Playwright โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
82
+ async def _talk_to_duck(self, model_id: str, messages: list) -> str:
83
+ """
84
+ ูŠูุชุญ duck.ai ููŠ ุงู„ู…ุชุตูุญ ุงู„ุญู‚ูŠู‚ูŠ ูˆูŠุฑุณู„ ุงู„ุฑุณุงู„ุฉ
85
+ model_id: ุงู„ู‚ูŠู…ุฉ ู…ู† DUCK_MODELS dict
86
+ messages: ู‚ุงุฆู…ุฉ OpenAI messages
87
+ """
88
+ context = await self.browser.new_context(
89
+ user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
90
+ viewport={"width": 1920, "height": 1080},
91
+ )
92
+ await context.add_init_script(
93
+ "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
94
+ )
95
+ page = await context.new_page()
96
+ try:
97
+ page.set_default_timeout(120000)
98
+
99
+ # โ”€โ”€ ุจู†ุงุก ุงู„ู€ prompt ู…ู† messages โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
100
+ prompt = _build_duck_prompt(messages)
101
+
102
+ # โ”€โ”€ ูุชุญ duck.ai โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
103
+ await page.goto("https://duck.ai/", wait_until="domcontentloaded")
104
+ await asyncio.sleep(4)
105
+
106
+ # โ”€โ”€ ู‚ุจูˆู„ ุงู„ุดุฑูˆุท ุฅู† ุธู‡ุฑุช โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
107
+ try:
108
+ accept_btn = page.locator("button:has-text('Accept'), button:has-text('Get Started'), button:has-text('Start chatting')")
109
+ if await accept_btn.count() > 0:
110
+ await accept_btn.first.click()
111
+ await asyncio.sleep(2)
112
+ except Exception:
113
+ pass
114
+
115
+ # โ”€โ”€ ุงุฎุชูŠุงุฑ ุงู„ู†ู…ูˆุฐุฌ ุงู„ู…ุทู„ูˆุจ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
116
+ # duck.ai ูŠุนุฑุถ ู†ู…ูˆุฐุฌุงู‹ ุงูุชุฑุงุถูŠุงู‹ - ู†ุญุชุงุฌ ุชุบูŠูŠุฑู‡ ุฅุฐุง ูƒุงู† ู…ุฎุชู„ูุงู‹
117
+ try:
118
+ model_btn = page.locator("[data-testid='model-selector'], button[aria-label*='model'], button[aria-label*='Model'], .model-selector")
119
+ if await model_btn.count() > 0:
120
+ await model_btn.first.click()
121
+ await asyncio.sleep(1)
122
+ # ุงู„ุจุญุซ ุนู† ุงู„ู†ู…ูˆุฐุฌ ุงู„ู…ุทู„ูˆุจ ููŠ ุงู„ู‚ุงุฆู…ุฉ
123
+ model_option = page.locator(f"[data-value='{model_id}'], [value='{model_id}'], li:has-text('{model_id.split('/')[-1]}')")
124
+ if await model_option.count() > 0:
125
+ await model_option.first.click()
126
+ await asyncio.sleep(1)
127
+ else:
128
+ # ุฅุบู„ุงู‚ ุงู„ู‚ุงุฆู…ุฉ ุฅุฐุง ู„ู… ูŠุฌุฏ ุงู„ู†ู…ูˆุฐุฌ
129
+ await page.keyboard.press("Escape")
130
+ except Exception as e:
131
+ print(f"[DUCK] Model selection skipped: {e}")
132
+
133
+ # โ”€โ”€ ุฅุฑุณุงู„ ุงู„ุฑุณุงู„ุฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
134
+ textarea = page.locator("textarea, [contenteditable='true'], [role='textbox']").first
135
+ await textarea.wait_for(state="visible", timeout=30000)
136
+ await textarea.click()
137
+ await textarea.fill(prompt)
138
+ await asyncio.sleep(0.5)
139
+ await page.keyboard.press("Enter")
140
+ print(f"[DUCK] Sent ({len(prompt)} chars)")
141
+
142
+ # โ”€โ”€ ุงู†ุชุธุงุฑ ุงู„ุฑุฏ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
143
+ await asyncio.sleep(3)
144
 
145
+ # ุงู†ุชุธุฑ ุฃู† ูŠุจุฏุฃ ุงู„ุฑุฏ
146
+ response_selector = "[data-testid='message-assistant'], .message-content, .chat-message-content, [class*='AssistantMessage'], [class*='assistant-message']"
147
+ await page.wait_for_selector(response_selector, timeout=60000)
148
+
149
+ # ุงู†ุชุธุฑ ุญุชู‰ ูŠุชูˆู‚ู ุงู„ุฑุฏ ุนู† ุงู„ุชุบูŠู‘ุฑ
150
+ last_text = ""
151
+ unchanged_cnt = 0
152
+ while unchanged_cnt < 6:
153
+ current_text = await page.evaluate("""
154
+ () => {
155
+ // ู…ุญุงูˆู„ุฉ ุนุฏุฉ selectors
156
+ const selectors = [
157
+ '[data-testid="message-assistant"]:last-child',
158
+ '.chat-message--assistant:last-child',
159
+ '[class*="AssistantMessage"]:last-child',
160
+ '[class*="assistant"]:last-child [class*="content"]',
161
+ ];
162
+ for (const sel of selectors) {
163
+ const el = document.querySelector(sel);
164
+ if (el && el.innerText && el.innerText.trim().length > 0) {
165
+ return el.innerText.trim();
166
+ }
167
+ }
168
+ // fallback: ุขุฎุฑ ุฑุณุงู„ุฉ ููŠ ุงู„ู…ุญุงุฏุซุฉ
169
+ const allMsgs = document.querySelectorAll(
170
+ '[class*="message"]:not([class*="user"]):not([class*="User"])'
171
+ );
172
+ if (allMsgs.length > 0) {
173
+ return allMsgs[allMsgs.length - 1].innerText.trim();
174
+ }
175
+ return '';
176
+ }
177
+ """)
178
+ if current_text == last_text and current_text.strip():
179
+ unchanged_cnt += 1
180
+ else:
181
+ last_text = current_text
182
+ unchanged_cnt = 0
183
+ await asyncio.sleep(1.2)
184
+
185
+ print(f"[DUCK] Response: {len(last_text)} chars")
186
+ return last_text.strip()
187
+
188
+ except Exception as e:
189
+ print(f"[DUCK] Error: {e}")
190
+ raise e
191
+ finally:
192
+ await page.close()
193
+ await context.close()
194
+
195
+ # โ”€โ”€ ZAI via Playwright โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
196
+ async def _talk_to_zai(self, prompt: str) -> str:
197
  context = await self.browser.new_context(
198
  user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36",
199
  viewport={"width": 1920, "height": 1080},
 
210
  await page.fill("textarea#chat-input", prompt)
211
  await asyncio.sleep(0.5)
212
  await page.press("textarea#chat-input", "Enter")
213
+ print(f"[ZAI] Sent ({len(prompt)} chars)")
214
  await asyncio.sleep(2)
215
  await page.wait_for_selector("#response-content-container", timeout=120000)
216
  last_text = ""
 
233
  last_text = current_text
234
  unchanged_cnt = 0
235
  await asyncio.sleep(1.0)
236
+ print(f"[ZAI] Response: {len(last_text)} chars")
237
  return last_text.strip()
238
  except Exception as e:
239
+ print(f"[ZAI] Error: {e}")
240
  raise e
241
  finally:
242
  await page.close()
243
  await context.close()
244
 
245
+ # โ”€โ”€ Public methods โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
246
+ def process_duck(self, model_id: str, messages: list) -> str:
247
+ if not self.ready_event.wait(timeout=60):
248
+ raise Exception("Browser not ready")
249
+ future = asyncio.run_coroutine_threadsafe(
250
+ self._talk_to_duck(model_id, messages), self.loop
251
+ )
252
+ return future.result(timeout=180)
253
+
254
+ def process_zai(self, prompt: str) -> str:
255
  if not self.ready_event.wait(timeout=60):
256
  raise Exception("Browser not ready")
257
  future = asyncio.run_coroutine_threadsafe(self._talk_to_zai(prompt), self.loop)
 
263
 
264
 
265
  # ====================================================================
266
+ # Prompt Helpers
267
  # ====================================================================
268
 
269
+ def _extract_content(msg: dict) -> str:
270
+ content = msg.get("content", "")
271
+ if isinstance(content, list):
272
+ parts = []
273
+ for item in content:
274
+ if isinstance(item, dict):
275
+ parts.append(item.get("text", item.get("content", str(item))))
276
+ else:
277
+ parts.append(str(item))
278
+ return "\n".join(parts)
279
+ return str(content) if content else ""
280
+
281
+
282
+ def _build_duck_prompt(messages: list) -> str:
283
+ """
284
+ ูŠุจู†ูŠ prompt ู†ุตูŠ ู…ู† messages ู„ุฅุฑุณุงู„ู‡ ู„ู€ duck.ai
285
+ ูŠุฏู…ุฌ system + history + ุงู„ุณุคุงู„ ุงู„ุฃุฎูŠุฑ
286
+ """
287
+ parts = []
288
+ for msg in messages:
289
+ role = msg.get("role", "user")
290
+ content = _extract_content(msg)
291
+ if not content.strip():
292
+ continue
293
+ if role == "system":
294
+ parts.append(f"[INSTRUCTIONS]: {content}")
295
+ elif role == "assistant":
296
+ parts.append(f"[Previous AI response]: {content}")
297
+ else:
298
+ parts.append(content)
299
+
300
+ # ุงู„ุฑุณุงู„ุฉ ุงู„ุฃุฎูŠุฑุฉ ูู‚ุท ู‡ูŠ ุงู„ุณุคุงู„ ุงู„ูุนู„ูŠ
301
+ if len(parts) > 1:
302
+ context = "\n\n".join(parts[:-1])
303
+ question = parts[-1]
304
+ return f"{context}\n\n---\n\n{question}"
305
+ return "\n\n".join(parts)
306
+
307
+
308
  def format_prompt(messages, tools=None):
309
  parts = []
310
  system_parts = []
 
314
  for msg in messages:
315
  role = msg.get("role", "")
316
  msg_type = msg.get("type", "")
317
+ content = _extract_content(msg)
 
 
 
 
 
 
 
 
 
318
 
319
  if role == "system":
320
  system_parts.append(content)
 
332
  func_args = msg.get("arguments", "{}")
333
  parts.append(f"[PREVIOUS TOOL CALL: Called '{func_name}' with arguments: {func_args}]")
334
  elif role == "assistant":
335
+ assistant_content = content
336
+ tool_calls_in_msg = msg.get("tool_calls", [])
337
  if tool_calls_in_msg:
338
  tc_desc = []
339
  for tc in tool_calls_in_msg:
 
357
  final += "=== SYSTEM INSTRUCTIONS (FOLLOW STRICTLY) ===\n" + "\n\n".join(system_parts) + "\n=== END OF INSTRUCTIONS ===\n\n"
358
 
359
  if tools and not has_tool_results:
360
+ final += _format_tools_instruction(tools, user_question)
361
 
362
  if has_tool_results:
363
  final += "=== CONTEXT FROM TOOLS ===\nThe following information was retrieved by the tools you requested.\nUse ONLY this information to answer the user's question.\n\n"
 
371
  return final
372
 
373
 
374
+ def _format_tools_instruction(tools, user_question=""):
375
  instruction = "\n=== MANDATORY TOOL USAGE ===\n"
376
  instruction += "You MUST use one of the tools below to answer this question.\n"
377
  instruction += "Do NOT answer directly. Do NOT say you don't have information.\n"
 
380
  instruction += '{"tool_calls": [{"name": "TOOL_NAME", "arguments": {"param": "value"}}]}\n\n'
381
  instruction += "RULES:\n- Your ENTIRE response must be valid JSON only\n- No markdown, no code blocks, no explanation\n- No text before or after the JSON\n\n"
382
  instruction += "Available tools:\n\n"
 
383
  for tool in tools:
384
  func = tool.get("function", tool)
385
  name = func.get("name", "unknown")
 
395
  req = "required" if pname in required_params else "optional"
396
  instruction += f" - {pname} ({ptype}, {req}): {pdesc}\n"
397
  instruction += "\n"
 
398
  instruction += "=== END OF TOOLS ===\n\n"
399
  first_func = (tools[0] if tools else {}).get("function", tools[0] if tools else {})
400
  first_name = first_func.get("name", "tool")
 
409
  m = re.search(r'```(?:json)?\s*\n?(.*?)\n?\s*```', cleaned, re.DOTALL)
410
  if m:
411
  cleaned = m.group(1).strip()
 
412
  candidates = [cleaned]
413
  m2 = re.search(r'\{[\s\S]*"tool_calls"[\s\S]*\}', cleaned)
414
  if m2:
415
  candidates.append(m2.group(0))
 
416
  for candidate in candidates:
417
  try:
418
  parsed = json.loads(candidate)
 
421
  if isinstance(raw_calls, list) and raw_calls:
422
  formatted = []
423
  for call in raw_calls:
424
+ tool_name = call.get("name", "")
425
+ arguments = call.get("arguments", {})
426
  arguments_str = json.dumps(arguments, ensure_ascii=False) if isinstance(arguments, dict) else str(arguments)
427
  formatted.append({
428
  "id": f"call_{uuid.uuid4().hex[:24]}",
 
436
 
437
 
438
  # ====================================================================
439
+ # Auth & Response Builder
440
  # ====================================================================
441
 
 
 
 
 
442
  def _auth(request: Request) -> bool:
443
  auth = request.headers.get("authorization", "")
444
  return auth.replace("Bearer ", "").strip() == API_SECRET_KEY
445
 
446
 
447
+ def _is_duck_model(model: str) -> bool:
448
+ return model in DUCK_MODELS
449
+
450
+
451
+ def _make_completion(start_time, model, response_text, messages, tools):
452
  p_tokens = sum(len(_extract_content(m).split()) for m in messages)
453
  c_tokens = len(response_text.split())
454
  tool_calls = parse_tool_calls(response_text) if tools else None
 
482
  app = FastAPI(title="ZAI + DuckAI API Server")
483
 
484
 
 
485
  @app.post("/v1/chat/completions")
486
  async def chat_completions(request: Request):
487
  try:
 
498
 
499
  model = data.get("model", "gpt-4o-mini")
500
  tools = data.get("tools", None)
 
501
  start_time = time.time()
502
 
503
  try:
 
504
  if _is_duck_model(model):
505
+ duck_model_id = DUCK_MODELS[model]
506
+ print(f"[SERVER] Duck.ai request โ†’ {duck_model_id}")
507
+ response_text = await asyncio.get_event_loop().run_in_executor(
508
+ None, browser_engine.process_duck, duck_model_id, messages
509
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
510
  else:
511
+ prompt = format_prompt(messages, tools=tools)
512
+ print(f"[SERVER] ZAI request ({len(prompt)} chars)")
513
+ response_text = await asyncio.get_event_loop().run_in_executor(
514
+ None, browser_engine.process_zai, prompt
515
+ )
516
+
517
+ return _make_completion(start_time, model, response_text, messages, tools)
518
 
519
  except Exception as e:
520
+ print(f"[SERVER] Error: {e}")
521
  return JSONResponse(status_code=500, content={"error": {"message": str(e)}})
522
 
523
 
 
524
  @app.post("/v1/responses")
525
  async def responses(request: Request):
526
  try:
 
552
 
553
  try:
554
  if _is_duck_model(model):
555
+ duck_model_id = DUCK_MODELS[model]
556
+ response_text = await asyncio.get_event_loop().run_in_executor(
557
+ None, browser_engine.process_duck, duck_model_id, messages
558
+ )
559
  else:
560
  prompt = format_prompt(messages, tools=tools)
561
+ response_text = await asyncio.get_event_loop().run_in_executor(
562
+ None, browser_engine.process_zai, prompt
563
+ )
564
 
565
  p_tokens = sum(len(_extract_content(m).split()) for m in messages)
566
  c_tokens = len(response_text.split())
567
  tool_calls = parse_tool_calls(response_text) if tools else None
568
 
569
  if tool_calls:
570
+ output_items = [{
571
+ "type": "function_call",
572
+ "id": tc["id"],
573
+ "call_id": tc["id"],
574
+ "name": tc["function"]["name"],
575
+ "arguments": tc["function"]["arguments"],
576
+ "status": "completed",
577
+ } for tc in tool_calls]
 
 
578
  return {
579
  "id": f"resp-{uuid.uuid4().hex[:29]}", "object": "response",
580
  "created_at": int(start_time), "model": model, "status": "completed",
 
596
  return JSONResponse(status_code=500, content={"error": {"message": str(e)}})
597
 
598
 
 
599
  @app.get("/v1/models")
600
  async def list_models(request: Request):
601
  if not _auth(request):
 
603
  return {
604
  "object": "list",
605
  "data": [
606
+ {"id": m, "object": "model",
607
+ "owned_by": "duck.ai" if m in DUCK_MODELS else "zai"}
 
 
 
608
  for m in ALL_MODELS
609
  ],
610
  }
611
 
612
 
 
613
  @app.get("/health")
614
  @app.get("/")
615
  async def health_check():
 
623
 
624
  if __name__ == "__main__":
625
  import uvicorn
626
+ uvicorn.run(app, host="0.0.0.0", port=7860)