File size: 2,309 Bytes
47e787a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f44f98
 
47e787a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f44f98
 
 
 
 
 
 
 
 
 
 
 
47e787a
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import json
from openai import OpenAI
from gradio import Server
from fastapi.responses import HTMLResponse, StreamingResponse
from fastapi import Request

client = OpenAI(
    base_url="https://router.huggingface.co/v1",
    api_key=os.environ.get("HF_TOKEN", ""),
    default_headers={"X-HF-Bill-To": "huggingface"},
)

MODEL = "inclusionAI/Ling-2.6-1T:novita"
app = Server()


@app.api()
def chat(messages: list, system_prompt: str = "") -> str:
    """Send a conversation to Ling-2.6-1T and return the assistant reply."""
    formatted: list[dict] = []
    if system_prompt.strip():
        formatted.append({"role": "system", "content": system_prompt.strip()})
    for msg in messages:
        formatted.append({"role": msg["role"], "content": msg["content"]})
    completion = client.chat.completions.create(model=MODEL, messages=formatted)
    if not completion.choices:
        return "Error: No response from model."
    return completion.choices[0].message.content


@app.post("/stream_chat")
async def stream_chat(request: Request):
    body = await request.json()
    messages: list[dict] = body.get("messages", [])
    system_prompt: str = body.get("system_prompt", "")
    formatted: list[dict] = []
    if system_prompt.strip():
        formatted.append({"role": "system", "content": system_prompt.strip()})
    for msg in messages:
        formatted.append({"role": msg["role"], "content": msg["content"]})

    def event_stream():
        try:
            stream = client.chat.completions.create(
                model=MODEL, messages=formatted, stream=True
            )
            for chunk in stream:
                if not chunk.choices:
                    continue
                delta = chunk.choices[0].delta
                if delta.content:
                    yield f"data: {json.dumps({'token': delta.content})}\n\n"
        except Exception as e:
            yield f"data: {json.dumps({'error': str(e)})}\n\n"
        yield "data: [DONE]\n\n"

    return StreamingResponse(event_stream(), media_type="text/event-stream")


@app.get("/")
async def homepage():
    html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "index.html")
    with open(html_path, "r", encoding="utf-8") as f:
        return HTMLResponse(f.read())


app.launch(show_error=True)