summaryrefslogtreecommitdiff
path: root/examples/server/utils.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/server/utils.hpp')
-rw-r--r--examples/server/utils.hpp170
1 files changed, 0 insertions, 170 deletions
diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp
index e6a1f069..70be0748 100644
--- a/examples/server/utils.hpp
+++ b/examples/server/utils.hpp
@@ -411,177 +411,7 @@ static json oaicompat_completion_params_parse(
return llama_params;
}
-static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
- bool stopped_word = result.count("stopped_word") != 0;
- bool stopped_eos = json_value(result, "stopped_eos", false);
- int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
- int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
- std::string content = json_value(result, "content", std::string(""));
-
- std::string finish_reason = "length";
- if (stopped_word || stopped_eos) {
- finish_reason = "stop";
- }
-
- json choices =
- streaming ? json::array({json{{"finish_reason", finish_reason},
- {"index", 0},
- {"delta", json::object()}}})
- : json::array({json{{"finish_reason", finish_reason},
- {"index", 0},
- {"message", json{{"content", content},
- {"role", "assistant"}}}}});
-
- std::time_t t = std::time(0);
-
- json res = json {
- {"choices", choices},
- {"created", t},
- {"model",
- json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
- {"object", streaming ? "chat.completion.chunk" : "chat.completion"},
- {"usage", json {
- {"completion_tokens", num_tokens_predicted},
- {"prompt_tokens", num_prompt_tokens},
- {"total_tokens", num_tokens_predicted + num_prompt_tokens}
- }},
- {"id", completion_id}
- };
-
- if (server_verbose) {
- res["__verbose"] = result;
- }
-
- if (result.contains("completion_probabilities")) {
- res["completion_probabilities"] = json_value(result, "completion_probabilities", json::array());
- }
-
- return res;
-}
-
-// return value is vector as there is one case where we might need to generate two responses
-static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
- if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
- return std::vector<json>({result});
- }
-
- bool first = json_value(result, "oaicompat_token_ctr", 0) == 0;
- std::string modelname = json_value(result, "model", std::string(DEFAULT_OAICOMPAT_MODEL));
-
- bool stopped_word = json_value(result, "stopped_word", false);
- bool stopped_eos = json_value(result, "stopped_eos", false);
- bool stopped_limit = json_value(result, "stopped_limit", false);
- std::string content = json_value(result, "content", std::string(""));
-
- std::string finish_reason;
- if (stopped_word || stopped_eos) {
- finish_reason = "stop";
- }
- if (stopped_limit) {
- finish_reason = "length";
- }
-
- std::time_t t = std::time(0);
-
- json choices;
- if (!finish_reason.empty()) {
- choices = json::array({json{{"finish_reason", finish_reason},
- {"index", 0},
- {"delta", json::object()}}});
- } else {
- if (first) {
- if (content.empty()) {
- choices = json::array({json{{"finish_reason", nullptr},
- {"index", 0},
- {"delta", json{{"role", "assistant"}}}}});
- } else {
- // We have to send this as two updates to conform to openai behavior
- json initial_ret = json{{"choices", json::array({json{
- {"finish_reason", nullptr},
- {"index", 0},
- {"delta", json{
- {"role", "assistant"}
- }}}})},
- {"created", t},
- {"id", completion_id},
- {"model", modelname},
- {"object", "chat.completion.chunk"}};
-
- json second_ret = json{
- {"choices", json::array({json{{"finish_reason", nullptr},
- {"index", 0},
- {"delta", json{
- {"content", content}}}
- }})},
- {"created", t},
- {"id", completion_id},
- {"model", modelname},
- {"object", "chat.completion.chunk"}};
-
- return std::vector<json>({initial_ret, second_ret});
- }
- } else {
- // Some idiosyncrasy in task processing logic makes several trailing calls
- // with empty content, we ignore these at the calee site.
- if (content.empty()) {
- return std::vector<json>({json::object()});
- }
-
- choices = json::array({json{
- {"finish_reason", nullptr},
- {"index", 0},
- {"delta",
- json{
- {"content", content},
- }},
- }});
- }
- }
-
- json ret = json {
- {"choices", choices},
- {"created", t},
- {"id", completion_id},
- {"model", modelname},
- {"object", "chat.completion.chunk"}
- };
- if (!finish_reason.empty()) {
- int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
- int num_prompt_tokens = json_value(result, "tokens_evaluated", 0);
- ret.push_back({"usage", json {
- {"completion_tokens", num_tokens_predicted},
- {"prompt_tokens", num_prompt_tokens},
- {"total_tokens", num_tokens_predicted + num_prompt_tokens}
- }});
- }
-
- return std::vector<json>({ret});
-}
-
-static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
- json data = json::array();
- int i = 0;
- for (auto & elem : embeddings) {
- data.push_back(json{
- {"embedding", json_value(elem, "embedding", json::array())},
- {"index", i++},
- {"object", "embedding"}
- });
- }
-
- json res = json {
- {"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
- {"object", "list"},
- {"usage", json {
- {"prompt_tokens", 0},
- {"total_tokens", 0}
- }},
- {"data", data}
- };
-
- return res;
-}
static json format_tokenizer_response(const std::vector<llama_token> & tokens) {
return json {