import os import json from openai import OpenAI from gradio import Server from fastapi.responses import HTMLResponse, StreamingResponse from fastapi import Request client = OpenAI( base_url="https://router.huggingface.co/v1", api_key=os.environ.get("HF_TOKEN", ""), default_headers={"X-HF-Bill-To": "huggingface"}, ) MODEL = "inclusionAI/Ling-2.6-1T:novita" app = Server() @app.api() def chat(messages: list, system_prompt: str = "") -> str: """Send a conversation to Ling-2.6-1T and return the assistant reply.""" formatted: list[dict] = [] if system_prompt.strip(): formatted.append({"role": "system", "content": system_prompt.strip()}) for msg in messages: formatted.append({"role": msg["role"], "content": msg["content"]}) completion = client.chat.completions.create(model=MODEL, messages=formatted) if not completion.choices: return "Error: No response from model." return completion.choices[0].message.content @app.post("/stream_chat") async def stream_chat(request: Request): body = await request.json() messages: list[dict] = body.get("messages", []) system_prompt: str = body.get("system_prompt", "") formatted: list[dict] = [] if system_prompt.strip(): formatted.append({"role": "system", "content": system_prompt.strip()}) for msg in messages: formatted.append({"role": msg["role"], "content": msg["content"]}) def event_stream(): try: stream = client.chat.completions.create( model=MODEL, messages=formatted, stream=True ) for chunk in stream: if not chunk.choices: continue delta = chunk.choices[0].delta if delta.content: yield f"data: {json.dumps({'token': delta.content})}\n\n" except Exception as e: yield f"data: {json.dumps({'error': str(e)})}\n\n" yield "data: [DONE]\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream") @app.get("/") async def homepage(): html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html") with open(html_path, "r", encoding="utf-8") as f: return HTMLResponse(f.read()) app.launch(show_error=True)