From 332bdfd7980718abf664bfa5460f2288a3314984 Mon Sep 17 00:00:00 2001
From: Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:09:32 +0900
Subject: server : maintain chat completion id for streaming responses (#5988)

* server: maintain chat completion id for streaming responses

* Update examples/server/utils.hpp

* Update examples/server/utils.hpp

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/server/utils.hpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'examples/server/utils.hpp')
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index df0a2778..f27af81e 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -378,7 +378,7 @@ static json oaicompat_completion_params_parse(
     return llama_params;
 }
 
-static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) {
+static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
     bool stopped_word        = result.count("stopped_word") != 0;
     bool stopped_eos         = json_value(result, "stopped_eos", false);
     int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
@@ -412,7 +412,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
             {"prompt_tokens",     num_prompt_tokens},
             {"total_tokens",      num_tokens_predicted + num_prompt_tokens}
         }},
-        {"id", gen_chatcmplid()}
+        {"id", completion_id}
     };
 
     if (server_verbose) {
@@ -427,7 +427,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
 }
 
 // return value is vector as there is one case where we might need to generate two responses
-static std::vector<json> format_partial_response_oaicompat(json result) {
+static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
     if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
         return std::vector<json>({result});
     }
@@ -471,7 +471,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
                                             {"role", "assistant"}
                                         }}}})},
                             {"created", t},
-                            {"id", gen_chatcmplid()},
+                            {"id", completion_id},
                             {"model", modelname},
                             {"object", "chat.completion.chunk"}};
 
@@ -482,7 +482,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
                                                             {"content", content}}}
                                                             }})},
                             {"created", t},
-                            {"id", gen_chatcmplid()},
+                            {"id", completion_id},
                             {"model", modelname},
                             {"object", "chat.completion.chunk"}};
 
@@ -509,7 +509,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
     json ret = json {
         {"choices", choices},
         {"created", t},
-        {"id",      gen_chatcmplid()},
+        {"id",      completion_id},
         {"model",   modelname},
         {"object",  "chat.completion.chunk"}
     };
-- 
cgit v1.2.3