Spaces:
Runtime error
Runtime error
| """axentx coder-zero-gpu — thin proxy to HF Inference Router. | |
| Why simple gradio? Previous attempts mixed FastAPI + gradio mount which | |
| crashed on zero-a10g startup. This version is pure gradio app — gradio | |
| auto-exposes a Predict API at /api/predict that we call from the LLM | |
| chain. ZeroGPU not actually used (we don't load a model — just proxy) | |
| so any tier works. | |
| """ | |
| import json | |
| import os | |
| import urllib.request | |
| import gradio as gr | |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") | |
| MODEL = os.environ.get("MODEL_ID", "Qwen/Qwen3-Coder-30B-A3B-Instruct") | |
| def chat(prompt: str, system: str = "", max_tokens: int = 1024, | |
| temperature: float = 0.3) -> str: | |
| messages = [] | |
| if system: | |
| messages.append({"role": "system", "content": system}) | |
| messages.append({"role": "user", "content": prompt}) | |
| body = json.dumps({ | |
| "model": MODEL, | |
| "messages": messages, | |
| "max_tokens": int(max_tokens), | |
| "temperature": float(temperature), | |
| }).encode() | |
| req = urllib.request.Request( | |
| "https://router.huggingface.co/v1/chat/completions", | |
| data=body, method="POST", | |
| headers={"Authorization": f"Bearer {HF_TOKEN}", | |
| "Content-Type": "application/json"}) | |
| try: | |
| with urllib.request.urlopen(req, timeout=60) as r: | |
| d = json.loads(r.read()) | |
| return d["choices"][0]["message"]["content"] | |
| except Exception as e: | |
| return f"ERROR: {type(e).__name__}: {e}" | |
| # Simple gradio app — Space's auto-exposed API at /api/predict accepts | |
| # {data: [prompt, system, max_tokens, temperature]} and returns {data:[output]} | |
| demo = gr.Interface( | |
| fn=chat, | |
| inputs=[ | |
| gr.Textbox(label="prompt", lines=4), | |
| gr.Textbox(label="system", lines=2, value=""), | |
| gr.Number(label="max_tokens", value=1024), | |
| gr.Number(label="temperature", value=0.3), | |
| ], | |
| outputs=gr.Textbox(label="output", lines=10), | |
| title="axentx coder zero-gpu", | |
| description="Thin proxy to HF Router. Use /api/predict from LLM chain.", | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch(server_name="0.0.0.0", server_port=7860) | |