qa1145 commited on
Commit
e6c065b
·
verified ·
1 Parent(s): 9188527

Upload 9 files

Browse files
Files changed (1) hide show
  1. app.py +64 -1
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from fastapi import FastAPI, Request, HTTPException
3
- from fastapi.responses import JSONResponse
4
  from pydantic import BaseModel
5
  from typing import List, Optional
6
  import asyncio
@@ -8,6 +8,7 @@ import random
8
  from datetime import datetime
9
  import threading
10
  import uvicorn
 
11
 
12
  from src.config import get_api_keys
13
  from src.model_tester import ModelTester
@@ -70,6 +71,12 @@ async def chat_completions(request: ChatCompletionRequest):
70
  prompt = request.messages[-1].content if request.messages else ""
71
  model_hint = request.model
72
 
 
 
 
 
 
 
73
  result = await model_tester.chat_completion(prompt, model_hint)
74
 
75
  if not result.get("success"):
@@ -104,6 +111,62 @@ async def chat_completions(request: ChatCompletionRequest):
104
  }
105
 
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  @fastapi_app.get("/health")
108
  async def health():
109
  return {"status": "ok"}
 
1
  import gradio as gr
2
  from fastapi import FastAPI, Request, HTTPException
3
+ from fastapi.responses import JSONResponse, StreamingResponse
4
  from pydantic import BaseModel
5
  from typing import List, Optional
6
  import asyncio
 
8
  from datetime import datetime
9
  import threading
10
  import uvicorn
11
+ import json
12
 
13
  from src.config import get_api_keys
14
  from src.model_tester import ModelTester
 
71
  prompt = request.messages[-1].content if request.messages else ""
72
  model_hint = request.model
73
 
74
+ if request.stream:
75
+ return StreamingResponse(
76
+ stream_chat(request.model, prompt, request.messages),
77
+ media_type="text/event-stream"
78
+ )
79
+
80
  result = await model_tester.chat_completion(prompt, model_hint)
81
 
82
  if not result.get("success"):
 
111
  }
112
 
113
 
114
+ async def stream_chat(model_hint: Optional[str], prompt: str, messages: list):
115
+ model_hint = model_hint or ""
116
+
117
+ result = await model_tester.chat_completion(prompt, model_hint)
118
+
119
+ if not result.get("success"):
120
+ yield f'data: {{"error": "{result.get("error", "Request failed")}"}}\n\n'
121
+ yield "data: [DONE]\n\n"
122
+ return
123
+
124
+ response_data = result.get("response", {})
125
+ content = ""
126
+ if "choices" in response_data and response_data["choices"]:
127
+ content = response_data["choices"][0].get("message", {}).get("content", "")
128
+
129
+ model_id = result.get("model", model_hint or "unknown")
130
+ completion_id = f"chatcmpl-{random.randint(100000, 999999)}"
131
+ created = int(datetime.now().timestamp())
132
+
133
+ # 流式输出每个字
134
+ for i, char in enumerate(content):
135
+ chunk = {
136
+ "id": completion_id,
137
+ "object": "chat.completion.chunk",
138
+ "created": created,
139
+ "model": model_id,
140
+ "choices": [
141
+ {
142
+ "index": 0,
143
+ "delta": {
144
+ "content": char
145
+ },
146
+ "finish_reason": None
147
+ }
148
+ ]
149
+ }
150
+ yield f"data: {json.dumps(chunk)}\n\n"
151
+
152
+ # 发送完成信号
153
+ final_chunk = {
154
+ "id": completion_id,
155
+ "object": "chat.completion.chunk",
156
+ "created": created,
157
+ "model": model_id,
158
+ "choices": [
159
+ {
160
+ "index": 0,
161
+ "delta": {},
162
+ "finish_reason": "stop"
163
+ }
164
+ ]
165
+ }
166
+ yield f"data: {json.dumps(final_chunk)}\n\n"
167
+ yield "data: [DONE]\n\n"
168
+
169
+
170
  @fastapi_app.get("/health")
171
  async def health():
172
  return {"status": "ok"}