Spaces:

ResearchEngineering
/

AGI

Sleeping

App Files Files Community

Dmitry Beresnev commited on Mar 19

Commit

470e737

1 Parent(s): 2c31416

fix error format wrapping now applies to /v1/chat/completions and generation stats

Browse files

Files changed (1) hide show

cpp/server.cpp +17 -2

cpp/server.cpp CHANGED Viewed

@@ -207,7 +207,8 @@ static std::string build_buffered_stream_response(const std::string &completion_
         });
     }
-    oss << build_sse_event({
         {"id", id},
         {"object", "chat.completion.chunk"},
         {"created", created},
@@ -219,7 +220,10 @@ static std::string build_buffered_stream_response(const std::string &completion_
                 {"finish_reason", "stop"}
             }
         })}
-    });
     oss << "data: [DONE]\n\n";
     return oss.str();
 }
@@ -256,6 +260,17 @@ http::response<http::string_body> handle_request(
     auto json_response = [&](http::status status, const json &obj) {
         json payload = obj;
         payload["request_id"] = request_id;
         http::response<http::string_body> res{status, req.version()};
         res.set(http::field::content_type, "application/json");
         res.set(http::field::server, "llm-manager");

         });
     }
+    // Final chunk: include usage and timings so the web UI can display generation stats
+    json final_chunk = {
         {"id", id},
         {"object", "chat.completion.chunk"},
         {"created", created},
                 {"finish_reason", "stop"}
             }
         })}
+    };
+    if (completion.contains("usage"))   final_chunk["usage"]   = completion["usage"];
+    if (completion.contains("timings")) final_chunk["timings"] = completion["timings"];
+    oss << build_sse_event(final_chunk);
     oss << "data: [DONE]\n\n";
     return oss.str();
 }
     auto json_response = [&](http::status status, const json &obj) {
         json payload = obj;
         payload["request_id"] = request_id;
+        // llama.cpp web UI expects {"error":{"code":N,"message":"..."}} for both
+        // /v1/chat/completions and /completion. Our generic errors use {"error":"string"}
+        // — rewrap so the popup shows the text.
+        if ((path == "/v1/chat/completions" || path == "/completion") &&
+            payload.contains("error") && payload["error"].is_string()) {
+            payload["error"] = {
+                {"code",    static_cast<int>(status)},
+                {"message", payload["error"].get<std::string>()},
+                {"type",    "server_error"}
+            };
+        }
         http::response<http::string_body> res{status, req.version()};
         res.set(http::field::content_type, "application/json");
         res.set(http::field::server, "llm-manager");