summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-11 23:23:49 +0200
committerGitHub <noreply@github.com>2024-01-11 23:23:49 +0200
commit1d118386fea031f01550f8cd47a5c86296e5333f (patch)
tree7a2a0995cb1791b0c590b0ff09754f5ac3c1f477
parent7edefbd79cc6dea96640edc54c6b94b2b2496d8b (diff)
server : fix infill when prompt is empty (#4833)
-rw-r--r--examples/server/server.cpp15
1 files changed, 8 insertions, 7 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 031824e1..1d30a15a 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1406,7 +1406,7 @@ struct llama_server_context
task.multitask_id = multitask_id;
// when a completion task's prompt array is not a singleton, we split it into multiple requests
- if (task.data.at("prompt").size() > 1)
+ if (task.data.count("prompt") && task.data.at("prompt").size() > 1)
{
lock.unlock(); // entering new func scope
return split_multiprompt_task(task);
@@ -1577,9 +1577,9 @@ struct llama_server_context
slot->reset();
- slot->infill = task.infill_mode;
- slot->embedding = task.embedding_mode;
- slot->task_id = task.id;
+ slot->infill = task.infill_mode;
+ slot->embedding = task.embedding_mode;
+ slot->task_id = task.id;
slot->multitask_id = task.multitask_id;
if (!launch_slot_with_data(slot, task.data))
@@ -1731,7 +1731,8 @@ struct llama_server_context
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
// empty prompt passed -> release the slot and send empty response
- if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
+ // note: infill mode allows empty prompt
+ if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt && !slot.infill)
{
slot.release();
slot.print_timings();
@@ -2609,8 +2610,8 @@ static json format_final_response_oaicompat(const json &request, const task_resu
{"object", streaming ? "chat.completion.chunk" : "chat.completion"},
{"usage",
json{{"completion_tokens", num_tokens_predicted},
- {"prompt_tokens", num_prompt_tokens},
- {"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
+ {"prompt_tokens", num_prompt_tokens},
+ {"total_tokens", num_tokens_predicted + num_prompt_tokens}}},
{"id", gen_chatcmplid()}};
if (server_verbose) {