summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-03-09 10:30:04 +0100
committerGitHub <noreply@github.com>2024-03-09 11:30:04 +0200
commitfd72d2d2a5e79d61ccef6af3d15f16e5e5cbc352 (patch)
tree3a8912adc90c34470fa1e3acb6dd861990159ec7 /examples/server/server.cpp
parentc2101a2e909ac7c08976d414e64e96c90ee5fa9e (diff)
server: tests: add truncated prompt tests, better kv cache size (#5933)
* server: tests: add truncated prompt tests, better size * server, tests : update regex --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp23
1 files changed, 19 insertions, 4 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 59a59d56..6f444998 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1128,6 +1128,7 @@ struct server_context {
LOG_VERBOSE("stopped by limit", {
{"id_slot", slot.id},
+ {"id_task", slot.id_task},
{"n_decoded", slot.n_decoded},
{"n_predict", slot.params.n_predict},
});
@@ -1141,6 +1142,8 @@ struct server_context {
}
LOG_VERBOSE("next token", {
+ {"id_slot", slot.id},
+ {"id_task", slot.id_task},
{"token", result.tok},
{"token_text", tokens_to_output_formatted_string(ctx, result.tok)},
{"has_next_token", slot.has_next_token},
@@ -1750,6 +1753,15 @@ struct server_context {
slot.n_past = 0;
slot.n_prompt_tokens = prompt_tokens.size();
+ LOG_VERBOSE("prompt tokenized", {
+ {"id_slot", slot.id},
+ {"id_task", slot.id_task},
+ {"n_ctx", slot.n_ctx},
+ {"n_keep", slot.params.n_keep},
+ {"n_prompt_tokens", slot.n_prompt_tokens},
+ {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())},
+ });
+
if (slot.embedding) {
// this prompt is too large to process - discard it
if (slot.n_prompt_tokens > n_batch) {
@@ -1788,10 +1800,13 @@ struct server_context {
slot.n_prompt_tokens = prompt_tokens.size();
LOG_VERBOSE("input truncated", {
- {"n_ctx", slot.n_ctx},
- {"n_keep", slot.params.n_keep},
- {"n_left", n_left},
- {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())},
+ {"id_slot", slot.id},
+ {"id_task", slot.id_task},
+ {"n_ctx", slot.n_ctx},
+ {"n_keep", slot.params.n_keep},
+ {"n_left", n_left},
+ {"n_prompt_tokens", slot.n_prompt_tokens},
+ {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())},
});
GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx);