diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-03-09 12:34:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-09 12:34:18 +0200 |
commit | 9674aaf35cb81478eb38c3f3ebde713ec72fbb79 (patch) | |
tree | bdf6dc4025f5c65a5f232b951b3c29b451a2f498 /examples/server/server.cpp | |
parent | 950ba1ab84db199f0bbdecdb2bb911f35261b321 (diff) |
server : simplify logic for empty prompts (#5953)
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 28 |
1 files changed, 15 insertions, 13 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 6e0f8328..aedf0afc 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1704,19 +1704,6 @@ struct server_context { // next, batch any pending prompts without exceeding n_batch if (params.cont_batching || batch.n_tokens == 0) { for (auto & slot : slots) { - const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()); - - // empty prompt passed -> release the slot and send empty response - // note: infill mode allows empty prompt - if (slot.state == SLOT_STATE_IDLE && slot.command == SLOT_COMMAND_LOAD_PROMPT && !has_prompt && !slot.infill) { - slot.state = SLOT_STATE_PROCESSING; - slot.command = SLOT_COMMAND_NONE; - slot.release(); - slot.print_timings(); - send_final_response(slot); - continue; - } - // this slot still has a prompt to be processed if (slot.state == SLOT_STATE_IDLE && slot.command == SLOT_COMMAND_LOAD_PROMPT) { auto & prompt_tokens = slot.prompt_tokens; @@ -1768,6 +1755,21 @@ struct server_context { {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, }); + // empty prompt passed -> release the slot and send empty response + if (prompt_tokens.empty()) { + LOG_INFO("empty prompt - releasing slot", { + {"id_slot", slot.id}, + {"id_task", slot.id_task} + }); + + slot.state = SLOT_STATE_PROCESSING; + slot.command = SLOT_COMMAND_NONE; + slot.release(); + slot.print_timings(); + send_final_response(slot); + continue; + } + if (slot.embedding) { // this prompt is too large to process - discard it if (slot.n_prompt_tokens > n_batch) { |