Spaces:
Sleeping
Sleeping
Dmitry Beresnev commited on
Commit ·
470e737
1
Parent(s): 2c31416
fix error format wrapping now applies to /v1/chat/completions and generation stats
Browse files- cpp/server.cpp +17 -2
cpp/server.cpp
CHANGED
|
@@ -207,7 +207,8 @@ static std::string build_buffered_stream_response(const std::string &completion_
|
|
| 207 |
});
|
| 208 |
}
|
| 209 |
|
| 210 |
-
|
|
|
|
| 211 |
{"id", id},
|
| 212 |
{"object", "chat.completion.chunk"},
|
| 213 |
{"created", created},
|
|
@@ -219,7 +220,10 @@ static std::string build_buffered_stream_response(const std::string &completion_
|
|
| 219 |
{"finish_reason", "stop"}
|
| 220 |
}
|
| 221 |
})}
|
| 222 |
-
}
|
|
|
|
|
|
|
|
|
|
| 223 |
oss << "data: [DONE]\n\n";
|
| 224 |
return oss.str();
|
| 225 |
}
|
|
@@ -256,6 +260,17 @@ http::response<http::string_body> handle_request(
|
|
| 256 |
auto json_response = [&](http::status status, const json &obj) {
|
| 257 |
json payload = obj;
|
| 258 |
payload["request_id"] = request_id;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
http::response<http::string_body> res{status, req.version()};
|
| 260 |
res.set(http::field::content_type, "application/json");
|
| 261 |
res.set(http::field::server, "llm-manager");
|
|
|
|
| 207 |
});
|
| 208 |
}
|
| 209 |
|
| 210 |
+
// Final chunk: include usage and timings so the web UI can display generation stats
|
| 211 |
+
json final_chunk = {
|
| 212 |
{"id", id},
|
| 213 |
{"object", "chat.completion.chunk"},
|
| 214 |
{"created", created},
|
|
|
|
| 220 |
{"finish_reason", "stop"}
|
| 221 |
}
|
| 222 |
})}
|
| 223 |
+
};
|
| 224 |
+
if (completion.contains("usage")) final_chunk["usage"] = completion["usage"];
|
| 225 |
+
if (completion.contains("timings")) final_chunk["timings"] = completion["timings"];
|
| 226 |
+
oss << build_sse_event(final_chunk);
|
| 227 |
oss << "data: [DONE]\n\n";
|
| 228 |
return oss.str();
|
| 229 |
}
|
|
|
|
| 260 |
auto json_response = [&](http::status status, const json &obj) {
|
| 261 |
json payload = obj;
|
| 262 |
payload["request_id"] = request_id;
|
| 263 |
+
// llama.cpp web UI expects {"error":{"code":N,"message":"..."}} for both
|
| 264 |
+
// /v1/chat/completions and /completion. Our generic errors use {"error":"string"}
|
| 265 |
+
// — rewrap so the popup shows the text.
|
| 266 |
+
if ((path == "/v1/chat/completions" || path == "/completion") &&
|
| 267 |
+
payload.contains("error") && payload["error"].is_string()) {
|
| 268 |
+
payload["error"] = {
|
| 269 |
+
{"code", static_cast<int>(status)},
|
| 270 |
+
{"message", payload["error"].get<std::string>()},
|
| 271 |
+
{"type", "server_error"}
|
| 272 |
+
};
|
| 273 |
+
}
|
| 274 |
http::response<http::string_body> res{status, req.version()};
|
| 275 |
res.set(http::field::content_type, "application/json");
|
| 276 |
res.set(http::field::server, "llm-manager");
|