Spaces:

ashirato
/

coder-zero-gpu-2

Runtime error

axentx-deploy

redeploy: simple gradio proxy

2cfc50a 4 days ago

2.13 kB

	"""axentx coder-zero-gpu — thin proxy to HF Inference Router.

	Why simple gradio? Previous attempts mixed FastAPI + gradio mount which
	crashed on zero-a10g startup. This version is pure gradio app — gradio
	auto-exposes a Predict API at /api/predict that we call from the LLM
	chain. ZeroGPU not actually used (we don't load a model — just proxy)
	so any tier works.
	"""
	import json
	import os
	import urllib.request
	import gradio as gr

	HF_TOKEN = os.environ.get("HF_TOKEN", "")
	MODEL = os.environ.get("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct")


	def chat(prompt: str, system: str = "", max_tokens: int = 1024,
	temperature: float = 0.3) -> str:
	messages = []
	if system:
	messages.append({"role": "system", "content": system})
	messages.append({"role": "user", "content": prompt})
	body = json.dumps({
	"model": MODEL,
	"messages": messages,
	"max_tokens": int(max_tokens),
	"temperature": float(temperature),
	}).encode()
	req = urllib.request.Request(
	"https://router.huggingface.co/v1/chat/completions",
	data=body, method="POST",
	headers={"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"})
	try:
	with urllib.request.urlopen(req, timeout=60) as r:
	d = json.loads(r.read())
	return d["choices"][0]["message"]["content"]
	except Exception as e:
	return f"ERROR: {type(e).__name__}: {e}"


	# Simple gradio app — Space's auto-exposed API at /api/predict accepts
	# {data: [prompt, system, max_tokens, temperature]} and returns {data:[output]}
	demo = gr.Interface(
	fn=chat,
	inputs=[
	gr.Textbox(label="prompt", lines=4),
	gr.Textbox(label="system", lines=2, value=""),
	gr.Number(label="max_tokens", value=1024),
	gr.Number(label="temperature", value=0.3),
	],
	outputs=gr.Textbox(label="output", lines=10),
	title="axentx coder zero-gpu",
	description="Thin proxy to HF Router. Use /api/predict from LLM chain.",
	)

	if __name__ == "__main__":
	demo.queue().launch(server_name="0.0.0.0", server_port=7860)