Spaces:

surrogate1
/

coder-zero-gpu-1

Starting

File size: 2,016 Bytes

c92f8a7
5af5954
c92f8a7
 
 
5af5954
c92f8a7
5af5954
 
 
c92f8a7
5af5954
c92f8a7
 
5af5954
 
c92f8a7
 
 
 
 
 
 
 
 
 
5af5954
c92f8a7
 
5af5954
 
f261990
c92f8a7
5af5954
 
 
 
 
 
 
 
 
 
c92f8a7
 
5af5954
 
 
c92f8a7
 
5af5954
 
f261990
c92f8a7
 
5af5954
c92f8a7
 
5af5954
 
c92f8a7
5af5954

"""axentx coder-zero-gpu-1 — proxy to HF Inference Router for Qwen3-Coder.

No model loading on Space (avoid GPU init issues). Just a thin wrapper
that forwards to HF Router with the Space owner's token. Adds independent
rate-limit bucket for the pipeline.
"""
import os, json, urllib.request
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import gradio as gr

HF_TOKEN = os.environ.get("HF_TOKEN", "")  # auto-set by Space
MODEL = os.environ.get("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct")


def _call_hf_router(messages, max_tokens=1024, temperature=0.3):
    body = json.dumps({
        "model": MODEL, "messages": messages,
        "max_tokens": max_tokens, "temperature": temperature,
    }).encode()
    req = urllib.request.Request(
        "https://router.huggingface.co/v1/chat/completions",
        data=body, method="POST",
        headers={"Authorization": f"Bearer {HF_TOKEN}",
                 "Content-Type": "application/json"},
    )
    with urllib.request.urlopen(req, timeout=60) as r:
        return json.loads(r.read())


app = FastAPI()
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])


class ChatRequest(BaseModel):
    messages: list
    max_tokens: int = 1024
    temperature: float = 0.3
    model: str = "axentx-coder-1"


@app.post("/v1/chat/completions")
def chat(req: ChatRequest):
    return _call_hf_router(req.messages, req.max_tokens, req.temperature)


@app.get("/health")
def h():
    return {"status": "ok", "backend": "hf-router", "model": MODEL}


def _ui(message, history):
    msgs = [{"role": h["role"], "content": h["content"]}
            for h in (history or []) if h.get("role")]
    msgs.append({"role": "user", "content": message})
    r = _call_hf_router(msgs)
    return r["choices"][0]["message"]["content"]


demo = gr.ChatInterface(_ui, title=f"axentx Coder Proxy — {MODEL}", type="messages")
app = gr.mount_gradio_app(app, demo, path="/")