summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-04-12 13:49:21 +0200
committerGitHub <noreply@github.com>2024-04-12 14:49:21 +0300
commit24ee66ed0d908d156bd0d1747b63a636a495cd7a (patch)
treea9d51fb2231c0b9a040440b62e282513265b0415
parent91c736015b66ba1d0b82cbae6313b6d5eaa61b68 (diff)
server : coherent log output for KV cache full (#6637)
-rw-r--r--examples/server/server.cpp22
1 files changed, 17 insertions, 5 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 2e791190..b08a09a5 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1083,7 +1083,7 @@ struct server_context {
};
if (llama_decode(ctx, batch_view) != 0) {
- LOG_TEE("%s: llama_decode() failed\n", __func__);
+ LOG_ERROR("llama_decode() failed", {});
return;
}
}
@@ -1281,7 +1281,11 @@ struct server_context {
}
void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
- LOG_TEE("task %i - error: %s\n", id_task, error.c_str());
+ LOG_ERROR("task error", {
+ {"id_multi", id_multi},
+ {"id_task", id_task},
+ {"error", error},
+ });
server_task_result res;
res.id = id_task;
@@ -2186,7 +2190,11 @@ struct server_context {
if (ret != 0) {
if (n_batch == 1 || ret < 0) {
// if you get here, it means the KV cache is full - try increasing it via the context size
- LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
+ LOG_ERROR("failed to decode the batch: KV cache is full - try increasing it via the context size", {
+ {"i", i},
+ {"n_batch", ret},
+ {"ret", ret},
+ });
for (auto & slot : slots) {
slot.state = SLOT_STATE_PROCESSING;
slot.command = SLOT_COMMAND_NONE;
@@ -2196,12 +2204,16 @@ struct server_context {
break; // break loop of n_batch
}
- LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
-
// retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2;
i -= n_batch;
+ LOG_WARNING("failed to find free space in the KV cache, retrying with smaller batch size - try increasing it via the context size or enable defragmentation", {
+ {"i", i},
+ {"n_batch", n_batch},
+ {"ret", ret},
+ });
+
continue; // continue loop of n_batch
}