diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-01-13 19:31:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-13 19:31:26 +0200 |
commit | 0ea069b87bd296c556824e57455433b6c0357340 (patch) | |
tree | baf7e00e69a3061926e4aa1800d2ea1c7f238626 /examples/server | |
parent | f172de03f11465dc6c5a0fc3a22f8ec254c6832c (diff) |
server : fix prompt caching with system prompt (#4914)
Diffstat (limited to 'examples/server')
-rw-r--r-- | examples/server/server.cpp | 18 |
1 files changed, 14 insertions, 4 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 79eacf82..93f99929 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1180,8 +1180,9 @@ struct llama_server_context return slot.images.size() > 0; } - void send_error(task_server& task, std::string error) + void send_error(task_server& task, const std::string &error) { + LOG_TEE("task %i - error: %s\n", task.id, error.c_str()); std::unique_lock<std::mutex> lock(mutex_results); task_result res; res.id = task.id; @@ -1570,12 +1571,22 @@ struct llama_server_context LOG_TEE("slot unavailable\n"); // send error result send_error(task, "slot unavailable"); - return; + break; } if (task.data.contains("system_prompt")) { + if (!all_slots_are_idle) { + send_error(task, "system prompt can only be updated when all slots are idle"); + break; + } process_system_prompt_data(task.data["system_prompt"]); + + // reset cache_tokens for all slots + for (llama_client_slot &slot : slots) + { + slot.cache_tokens.clear(); + } } slot->reset(); @@ -1652,8 +1663,7 @@ struct llama_server_context // attend tasks process_tasks(); - // update the system prompt wait until all slots are idle state - if (system_need_update && all_slots_are_idle) + if (system_need_update) { LOG_TEE("updating system prompt\n"); update_system_prompt(); |