Dmitry Beresnev commited on
Commit
470e737
·
1 Parent(s): 2c31416

fix error format wrapping now applies to /v1/chat/completions and generation stats

Browse files
Files changed (1) hide show
  1. cpp/server.cpp +17 -2
cpp/server.cpp CHANGED
@@ -207,7 +207,8 @@ static std::string build_buffered_stream_response(const std::string &completion_
207
  });
208
  }
209
 
210
- oss << build_sse_event({
 
211
  {"id", id},
212
  {"object", "chat.completion.chunk"},
213
  {"created", created},
@@ -219,7 +220,10 @@ static std::string build_buffered_stream_response(const std::string &completion_
219
  {"finish_reason", "stop"}
220
  }
221
  })}
222
- });
 
 
 
223
  oss << "data: [DONE]\n\n";
224
  return oss.str();
225
  }
@@ -256,6 +260,17 @@ http::response<http::string_body> handle_request(
256
  auto json_response = [&](http::status status, const json &obj) {
257
  json payload = obj;
258
  payload["request_id"] = request_id;
 
 
 
 
 
 
 
 
 
 
 
259
  http::response<http::string_body> res{status, req.version()};
260
  res.set(http::field::content_type, "application/json");
261
  res.set(http::field::server, "llm-manager");
 
207
  });
208
  }
209
 
210
+ // Final chunk: include usage and timings so the web UI can display generation stats
211
+ json final_chunk = {
212
  {"id", id},
213
  {"object", "chat.completion.chunk"},
214
  {"created", created},
 
220
  {"finish_reason", "stop"}
221
  }
222
  })}
223
+ };
224
+ if (completion.contains("usage")) final_chunk["usage"] = completion["usage"];
225
+ if (completion.contains("timings")) final_chunk["timings"] = completion["timings"];
226
+ oss << build_sse_event(final_chunk);
227
  oss << "data: [DONE]\n\n";
228
  return oss.str();
229
  }
 
260
  auto json_response = [&](http::status status, const json &obj) {
261
  json payload = obj;
262
  payload["request_id"] = request_id;
263
+ // llama.cpp web UI expects {"error":{"code":N,"message":"..."}} for both
264
+ // /v1/chat/completions and /completion. Our generic errors use {"error":"string"}
265
+ // — rewrap so the popup shows the text.
266
+ if ((path == "/v1/chat/completions" || path == "/completion") &&
267
+ payload.contains("error") && payload["error"].is_string()) {
268
+ payload["error"] = {
269
+ {"code", static_cast<int>(status)},
270
+ {"message", payload["error"].get<std::string>()},
271
+ {"type", "server_error"}
272
+ };
273
+ }
274
  http::response<http::string_body> res{status, req.version()};
275
  res.set(http::field::content_type, "application/json");
276
  res.set(http::field::server, "llm-manager");