Spaces:

surrogate1
/

coder-zero-gpu-1

Building

App Files Files Community

coder-zero-gpu-1 / app.py

surrogate1

switch to thin HF Router proxy (no model loading)

c92f8a7 verified 6 days ago

raw

history blame

2.02 kB

	"""axentx coder-zero-gpu-1 — proxy to HF Inference Router for Qwen3-Coder.

	No model loading on Space (avoid GPU init issues). Just a thin wrapper
	that forwards to HF Router with the Space owner's token. Adds independent
	rate-limit bucket for the pipeline.
	"""
	import os, json, urllib.request
	from fastapi import FastAPI
	from fastapi.middleware.cors import CORSMiddleware
	from pydantic import BaseModel
	import gradio as gr

	HF_TOKEN = os.environ.get("HF_TOKEN", "") # auto-set by Space
	MODEL = os.environ.get("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct")


	def _call_hf_router(messages, max_tokens=1024, temperature=0.3):
	body = json.dumps({
	"model": MODEL, "messages": messages,
	"max_tokens": max_tokens, "temperature": temperature,
	}).encode()
	req = urllib.request.Request(
	"https://router.huggingface.co/v1/chat/completions",
	data=body, method="POST",
	headers={"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"},
	)
	with urllib.request.urlopen(req, timeout=60) as r:
	return json.loads(r.read())


	app = FastAPI()
	app.add_middleware(CORSMiddleware, allow_origins=[""], allow_methods=[""], allow_headers=["*"])


	class ChatRequest(BaseModel):
	messages: list
	max_tokens: int = 1024
	temperature: float = 0.3
	model: str = "axentx-coder-1"


	@app.post("/v1/chat/completions")
	def chat(req: ChatRequest):
	return _call_hf_router(req.messages, req.max_tokens, req.temperature)


	@app.get("/health")
	def h():
	return {"status": "ok", "backend": "hf-router", "model": MODEL}


	def _ui(message, history):
	msgs = [{"role": h["role"], "content": h["content"]}
	for h in (history or []) if h.get("role")]
	msgs.append({"role": "user", "content": message})
	r = _call_hf_router(msgs)
	return r["choices"][0]["message"]["content"]


	demo = gr.ChatInterface(_ui, title=f"axentx Coder Proxy — {MODEL}", type="messages")
	app = gr.mount_gradio_app(app, demo, path="/")