proxycf

Sleeping

App Files Files Community

Elysiadev11 commited on 10 days ago

Commit

56972cd

verified ·

1 Parent(s): 93a977b

Update proxy_cf.py

Browse files

Files changed (1) hide show

proxy_cf.py +105 -39

proxy_cf.py CHANGED Viewed

@@ -80,10 +80,6 @@ CF_AI_BASE = "https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/v1"
 def cf_base(account_id: str) -> str:
     return CF_AI_BASE.format(account_id=account_id)
-def cf_url(account_id: str, model: str) -> str:
-    # Legacy /run endpoint (kept for fallback)
-    return f"https://api.cloudflare.com/client/v4/accounts/{account_id}/ai/run/{model}"
 async def get_key(exclude=None):
     global rr_index
@@ -99,7 +95,7 @@ async def get_key(exclude=None):
             if st["healthy"] and not st["busy"] and kid not in exclude:
                 st["busy"] = True
-                return acc  # returns dict {"account_id": ..., "api_key": ...}
     return None
@@ -137,9 +133,55 @@ async def wait_for_free_key(exclude=None, max_wait=30.0, interval=0.3):
     return None
-def is_rate_limited(status_code: int, text: str) -> bool:
     t = text.lower()
-    return status_code == 429 or "rate limit" in t or "too many requests" in t or "usage limit" in t
 # =====================================================
@@ -175,7 +217,6 @@ async def models(req: Request):
     if not auth_ok(req):
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
-    # Pakai account pertama yang healthy, tidak perlu mark busy
     acc = None
     async with _key_lock:
         for a in CF_ACCOUNTS:
@@ -196,7 +237,6 @@ async def models(req: Request):
         if r.status_code != 200:
             return JSONResponse({"error": f"CF returned {r.status_code}: {r.text}"}, status_code=r.status_code)
-        # CF sudah return OpenAI-compatible format, langsung forward
         return Response(content=r.content, media_type="application/json")
     except Exception as e:
@@ -219,12 +259,10 @@ async def chat(req: Request):
     is_stream = body.get("stream", False)
     model = body.get("model", DEFAULT_CF_MODEL)
-    # Pass body as-is ke CF — CF OpenAI-compatible endpoint terima format sama persis
     cf_body = {**body, "model": model}
     # -----------------------------------------
-    # NON STREAM — forward response langsung
     # -----------------------------------------
     if not is_stream:
         tried = set()
@@ -247,7 +285,10 @@ async def chat(req: Request):
                         }
                     )
-                if is_rate_limited(r.status_code, r.text):
                     log(f"Account {acc['account_id'][:8]}... rate limited (non-stream), trying next")
                     await mark_fail(acc)
                     continue
@@ -258,7 +299,6 @@ async def chat(req: Request):
                     continue
                 await mark_ok(acc)
-                # CF OpenAI-compatible → langsung forward, tidak perlu konversi
                 return Response(content=r.content, media_type="application/json")
             except Exception as e:
@@ -271,7 +311,7 @@ async def chat(req: Request):
         return JSONResponse({"error": "All accounts failed"}, status_code=500)
     # -----------------------------------------
-    # STREAM — CF kirim SSE OpenAI-format, langsung pipe ke client
     # -----------------------------------------
     async def gen():
         tried = set()
@@ -295,7 +335,8 @@ async def chat(req: Request):
                         }
                     ) as r:
-                        if is_rate_limited(r.status_code, ""):
                             log(f"Account {acc['account_id'][:8]}... rate limited (stream), trying next")
                             await mark_fail(acc)
                             continue
@@ -314,14 +355,16 @@ async def chat(req: Request):
                             if line.strip() == "data: [DONE]":
                                 break
-                            # Detect mid-stream rate limit dalam payload
                             raw = line[6:] if line.startswith("data: ") else line
-                            if is_rate_limited(0, raw):
-                                log(f"Account {acc['account_id'][:8]}... mid-stream limit, switching key")
                                 hit_limit = True
                                 break
-                            # CF OpenAI-compatible SSE → pipe langsung ke client
                             yield line + "\n\n"
                         if hit_limit:
@@ -409,7 +452,10 @@ async def anthropic(req: Request):
                         }
                     )
-                if is_rate_limited(r.status_code, r.text):
                     log(f"Account {acc['account_id'][:8]}... rate limited (anthropic non-stream), trying next")
                     await mark_fail(acc)
                     continue
@@ -420,8 +466,7 @@ async def anthropic(req: Request):
                     continue
                 data = r.json()
-                # CF OpenAI-compatible response → konversi ke Anthropic format
-                content_text = data["choices"][0]["message"]["content"]
                 usage = data.get("usage", {})
                 out = {
@@ -456,7 +501,7 @@ async def anthropic(req: Request):
     async def agen():
         tried = set()
         msg_id = "msg_" + uuid.uuid4().hex[:10]
-        sent_any_delta = False
         for _ in range(len(CF_ACCOUNTS)):
             acc = await wait_for_free_key(exclude=tried)
@@ -477,7 +522,8 @@ async def anthropic(req: Request):
                         }
                     ) as r:
-                        if is_rate_limited(r.status_code, ""):
                             log(f"Account {acc['account_id'][:8]}... rate limited (anthropic stream), trying next")
                             await mark_fail(acc)
                             continue
@@ -487,8 +533,9 @@ async def anthropic(req: Request):
                             await mark_fail(acc)
                             continue
-                        # Emit Anthropic envelope sekali saja saat key pertama berhasil
-                        if not sent_any_delta:
                             yield sse({
                                 "type": "message_start",
                                 "message": {
@@ -502,10 +549,11 @@ async def anthropic(req: Request):
                                     "usage": {"input_tokens": 0, "output_tokens": 0}
                                 }
                             })
                             yield sse({
                                 "type": "content_block_start",
                                 "index": 0,
-                                "content_block": {"type": "text"}
                             })
                         hit_limit = False
@@ -518,19 +566,25 @@ async def anthropic(req: Request):
                             raw = line[6:] if line.startswith("data: ") else line
-                            if is_rate_limited(0, raw):
-                                log(f"Account {acc['account_id'][:8]}... mid-stream limit (anthropic), switching key")
-                                hit_limit = True
                                 break
-                            try:
-                                j = json.loads(raw)
-                                token = j["choices"][0]["delta"].get("content", "")
-                            except Exception:
-                                continue
                             if token:
-                                sent_any_delta = True
                                 yield sse({
                                     "type": "content_block_delta",
                                     "index": 0,
@@ -552,6 +606,18 @@ async def anthropic(req: Request):
                 await release_key(acc)
         # Tutup Anthropic SSE envelope
         yield sse({"type": "content_block_stop", "index": 0})
         yield sse({
             "type": "message_delta",
@@ -560,4 +626,4 @@ async def anthropic(req: Request):
         })
         yield sse({"type": "message_stop"})
-    return StreamingResponse(agen(), media_type="text/event-stream")

 def cf_base(account_id: str) -> str:
     return CF_AI_BASE.format(account_id=account_id)
 async def get_key(exclude=None):
     global rr_index
             if st["healthy"] and not st["busy"] and kid not in exclude:
                 st["busy"] = True
+                return acc
     return None
     return None
+def is_rate_limited_status(status_code: int) -> bool:
+    """Cek rate limit hanya dari HTTP status code."""
+    return status_code == 429
+def is_rate_limited_error_body(text: str) -> bool:
+    """
+    Cek rate limit dari body HTTP error response.
+    HANYA dipakai pada non-200 HTTP response body atau JSON error object
+    — BUKAN pada token output model (supaya tidak false positive).
+    """
     t = text.lower()
+    return "rate limit" in t or "too many requests" in t or "usage limit" in t
+def parse_sse_chunk(raw: str):
+    """
+    Parse satu SSE data chunk dari CF (OpenAI-compatible format).
+    Return: (token, is_cf_error, error_text)
+      - token     : string content untuk di-stream ke client (bisa "" kalau thinking/kosong)
+      - is_cf_error: True kalau chunk ini adalah error dari CF API, bukan output model
+      - error_text : teks error kalau is_cf_error=True
+    """
+    try:
+        j = json.loads(raw)
+    except json.JSONDecodeError:
+        # Non-JSON → kemungkinan error text plain dari CF
+        return None, True, raw
+    # JSON dengan "error" key dan tanpa "choices" → error dari CF API
+    if "error" in j and "choices" not in j:
+        return None, True, json.dumps(j)
+    # Normal OpenAI delta chunk
+    choices = j.get("choices", [])
+    if not choices:
+        return "", False, ""
+    delta = choices[0].get("delta", {})
+    # content utama (None selama thinking phase di beberapa model)
+    content = delta.get("content") or ""
+    # Beberapa model thinking (Kimi K2, DeepSeek R1, dll) pakai reasoning_content
+    # untuk thinking tokens — ikutkan supaya thinking juga ke-stream
+    reasoning = delta.get("reasoning_content") or delta.get("reasoning") or ""
+    return reasoning + content, False, ""
 # =====================================================
     if not auth_ok(req):
         return JSONResponse({"error": "Unauthorized"}, status_code=401)
     acc = None
     async with _key_lock:
         for a in CF_ACCOUNTS:
         if r.status_code != 200:
             return JSONResponse({"error": f"CF returned {r.status_code}: {r.text}"}, status_code=r.status_code)
         return Response(content=r.content, media_type="application/json")
     except Exception as e:
     is_stream = body.get("stream", False)
     model = body.get("model", DEFAULT_CF_MODEL)
     cf_body = {**body, "model": model}
     # -----------------------------------------
+    # NON STREAM
     # -----------------------------------------
     if not is_stream:
         tried = set()
                         }
                     )
+                # FIX: cek rate limit hanya dari HTTP status/error body, bukan dari model output
+                if is_rate_limited_status(r.status_code) or (
+                    r.status_code != 200 and is_rate_limited_error_body(r.text)
+                ):
                     log(f"Account {acc['account_id'][:8]}... rate limited (non-stream), trying next")
                     await mark_fail(acc)
                     continue
                     continue
                 await mark_ok(acc)
                 return Response(content=r.content, media_type="application/json")
             except Exception as e:
         return JSONResponse({"error": "All accounts failed"}, status_code=500)
     # -----------------------------------------
+    # STREAM — pipe OpenAI SSE langsung ke client
     # -----------------------------------------
     async def gen():
         tried = set()
                         }
                     ) as r:
+                        # FIX: hanya cek status code untuk rate limit di sini
+                        if is_rate_limited_status(r.status_code):
                             log(f"Account {acc['account_id'][:8]}... rate limited (stream), trying next")
                             await mark_fail(acc)
                             continue
                             if line.strip() == "data: [DONE]":
                                 break
                             raw = line[6:] if line.startswith("data: ") else line
+                            # FIX: gunakan parse_sse_chunk, cek error hanya pada CF error object
+                            # — jangan cek kata "rate limit" pada konten model
+                            _, is_cf_err, err_text = parse_sse_chunk(raw)
+                            if is_cf_err and is_rate_limited_error_body(err_text):
+                                log(f"Account {acc['account_id'][:8]}... mid-stream CF error, switching key")
                                 hit_limit = True
                                 break
                             yield line + "\n\n"
                         if hit_limit:
                         }
                     )
+                # FIX: cek rate limit hanya dari HTTP status/error body
+                if is_rate_limited_status(r.status_code) or (
+                    r.status_code != 200 and is_rate_limited_error_body(r.text)
+                ):
                     log(f"Account {acc['account_id'][:8]}... rate limited (anthropic non-stream), trying next")
                     await mark_fail(acc)
                     continue
                     continue
                 data = r.json()
+                content_text = data["choices"][0]["message"]["content"] or ""
                 usage = data.get("usage", {})
                 out = {
     async def agen():
         tried = set()
         msg_id = "msg_" + uuid.uuid4().hex[:10]
+        envelope_sent = False
         for _ in range(len(CF_ACCOUNTS)):
             acc = await wait_for_free_key(exclude=tried)
                         }
                     ) as r:
+                        # FIX: hanya cek status code untuk rate limit
+                        if is_rate_limited_status(r.status_code):
                             log(f"Account {acc['account_id'][:8]}... rate limited (anthropic stream), trying next")
                             await mark_fail(acc)
                             continue
                             await mark_fail(acc)
                             continue
+                        # Kirim Anthropic envelope hanya sekali
+                        if not envelope_sent:
+                            envelope_sent = True
                             yield sse({
                                 "type": "message_start",
                                 "message": {
                                     "usage": {"input_tokens": 0, "output_tokens": 0}
                                 }
                             })
+                            # FIX: tambah "text": "" sesuai spec Anthropic
                             yield sse({
                                 "type": "content_block_start",
                                 "index": 0,
+                                "content_block": {"type": "text", "text": ""}
                             })
                         hit_limit = False
                             raw = line[6:] if line.startswith("data: ") else line
+                            # =============================================
+                            # FIX UTAMA: parse chunk dulu, baru cek error
+                            # JANGAN cek is_rate_limited pada teks model!
+                            # Ini penyebab response berhenti di tengah karena
+                            # model nulis kata "rate limit" / "too many requests"
+                            # dalam output / thinking-nya.
+                            # =============================================
+                            token, is_cf_err, err_text = parse_sse_chunk(raw)
+                            if is_cf_err:
+                                if is_rate_limited_error_body(err_text):
+                                    log(f"Account {acc['account_id'][:8]}... mid-stream CF rate limit, switching key")
+                                    hit_limit = True
+                                else:
+                                    log(f"Account {acc['account_id'][:8]}... mid-stream CF error: {err_text[:120]}")
                                 break
+                            # token "" → thinking phase tanpa content, skip saja
                             if token:
                                 yield sse({
                                     "type": "content_block_delta",
                                     "index": 0,
                 await release_key(acc)
         # Tutup Anthropic SSE envelope
+        # Edge case: semua account gagal sebelum sempat kirim envelope
+        if not envelope_sent:
+            yield sse({
+                "type": "message_start",
+                "message": {
+                    "id": msg_id, "type": "message", "role": "assistant",
+                    "model": model, "content": [], "stop_reason": None,
+                    "stop_sequence": None, "usage": {"input_tokens": 0, "output_tokens": 0}
+                }
+            })
+            yield sse({"type": "content_block_start", "index": 0, "content_block": {"type": "text", "text": ""}})
         yield sse({"type": "content_block_stop", "index": 0})
         yield sse({
             "type": "message_delta",
         })
         yield sse({"type": "message_stop"})
+    return StreamingResponse(agen(), media_type="text/event-stream")