Spaces:
Building
Building
| """axentx coder-zero-gpu-1 — proxy to HF Inference Router for Qwen3-Coder. | |
| No model loading on Space (avoid GPU init issues). Just a thin wrapper | |
| that forwards to HF Router with the Space owner's token. Adds independent | |
| rate-limit bucket for the pipeline. | |
| """ | |
| import os, json, urllib.request | |
| from fastapi import FastAPI | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| import gradio as gr | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") # auto-set by Space | |
| MODEL = os.environ.get("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct") | |
| def _call_hf_router(messages, max_tokens=1024, temperature=0.3): | |
| body = json.dumps({ | |
| "model": MODEL, "messages": messages, | |
| "max_tokens": max_tokens, "temperature": temperature, | |
| }).encode() | |
| req = urllib.request.Request( | |
| "https://router.huggingface.co/v1/chat/completions", | |
| data=body, method="POST", | |
| headers={"Authorization": f"Bearer {HF_TOKEN}", | |
| "Content-Type": "application/json"}, | |
| ) | |
| with urllib.request.urlopen(req, timeout=60) as r: | |
| return json.loads(r.read()) | |
| app = FastAPI() | |
| app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"]) | |
| class ChatRequest(BaseModel): | |
| messages: list | |
| max_tokens: int = 1024 | |
| temperature: float = 0.3 | |
| model: str = "axentx-coder-1" | |
| def chat(req: ChatRequest): | |
| return _call_hf_router(req.messages, req.max_tokens, req.temperature) | |
| def h(): | |
| return {"status": "ok", "backend": "hf-router", "model": MODEL} | |
| def _ui(message, history): | |
| msgs = [{"role": h["role"], "content": h["content"]} | |
| for h in (history or []) if h.get("role")] | |
| msgs.append({"role": "user", "content": message}) | |
| r = _call_hf_router(msgs) | |
| return r["choices"][0]["message"]["content"] | |
| demo = gr.ChatInterface(_ui, title=f"axentx Coder Proxy — {MODEL}", type="messages") | |
| app = gr.mount_gradio_app(app, demo, path="/") | |