Ling-2.6-1T / app.py
akhaliq's picture
akhaliq HF Staff
feat: implement error handling and status messaging for streaming chat completions
7f44f98
import os
import json
from openai import OpenAI
from gradio import Server
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi import Request
client = OpenAI(
base_url="https://router.huggingface.co/v1",
api_key=os.environ.get("HF_TOKEN", ""),
default_headers={"X-HF-Bill-To": "huggingface"},
)
MODEL = "inclusionAI/Ling-2.6-1T:novita"
app = Server()
@app.api()
def chat(messages: list, system_prompt: str = "") -> str:
"""Send a conversation to Ling-2.6-1T and return the assistant reply."""
formatted: list[dict] = []
if system_prompt.strip():
formatted.append({"role": "system", "content": system_prompt.strip()})
for msg in messages:
formatted.append({"role": msg["role"], "content": msg["content"]})
completion = client.chat.completions.create(model=MODEL, messages=formatted)
if not completion.choices:
return "Error: No response from model."
return completion.choices[0].message.content
@app.post("/stream_chat")
async def stream_chat(request: Request):
body = await request.json()
messages: list[dict] = body.get("messages", [])
system_prompt: str = body.get("system_prompt", "")
formatted: list[dict] = []
if system_prompt.strip():
formatted.append({"role": "system", "content": system_prompt.strip()})
for msg in messages:
formatted.append({"role": msg["role"], "content": msg["content"]})
def event_stream():
try:
stream = client.chat.completions.create(
model=MODEL, messages=formatted, stream=True
)
for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if delta.content:
yield f"data: {json.dumps({'token': delta.content})}\n\n"
except Exception as e:
yield f"data: {json.dumps({'error': str(e)})}\n\n"
yield "data: [DONE]\n\n"
return StreamingResponse(event_stream(), media_type="text/event-stream")
@app.get("/")
async def homepage():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(f.read())
app.launch(show_error=True)