summaryrefslogtreecommitdiff
path: root/examples/server/utils.hpp
diff options
context:
space:
mode:
authorMinsoo Cheong <54794500+mscheong01@users.noreply.github.com>2024-03-11 17:09:32 +0900
committerGitHub <noreply@github.com>2024-03-11 10:09:32 +0200
commit332bdfd7980718abf664bfa5460f2288a3314984 (patch)
tree4f6cc623350ef0b1fad735fc2b843a11bc88e381 /examples/server/utils.hpp
parentecab1c75de68de7c41c254e2ae170d3b07bee6d4 (diff)
server : maintain chat completion id for streaming responses (#5988)
* server: maintain chat completion id for streaming responses * Update examples/server/utils.hpp * Update examples/server/utils.hpp --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/utils.hpp')
-rw-r--r--examples/server/utils.hpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index df0a2778..f27af81e 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -378,7 +378,7 @@ static json oaicompat_completion_params_parse(
return llama_params;
}
-static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) {
+static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
bool stopped_word = result.count("stopped_word") != 0;
bool stopped_eos = json_value(result, "stopped_eos", false);
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
@@ -412,7 +412,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
{"prompt_tokens", num_prompt_tokens},
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
}},
- {"id", gen_chatcmplid()}
+ {"id", completion_id}
};
if (server_verbose) {
@@ -427,7 +427,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
}
// return value is vector as there is one case where we might need to generate two responses
-static std::vector<json> format_partial_response_oaicompat(json result) {
+static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
return std::vector<json>({result});
}
@@ -471,7 +471,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
{"role", "assistant"}
}}}})},
{"created", t},
- {"id", gen_chatcmplid()},
+ {"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}};
@@ -482,7 +482,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
{"content", content}}}
}})},
{"created", t},
- {"id", gen_chatcmplid()},
+ {"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}};
@@ -509,7 +509,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
json ret = json {
{"choices", choices},
{"created", t},
- {"id", gen_chatcmplid()},
+ {"id", completion_id},
{"model", modelname},
{"object", "chat.completion.chunk"}
};