diff options
author | Pierrick Hymbert <pierrick.hymbert@gmail.com> | 2024-03-09 10:30:04 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-09 11:30:04 +0200 |
commit | fd72d2d2a5e79d61ccef6af3d15f16e5e5cbc352 (patch) | |
tree | 3a8912adc90c34470fa1e3acb6dd861990159ec7 /examples/server/server.cpp | |
parent | c2101a2e909ac7c08976d414e64e96c90ee5fa9e (diff) |
server: tests: add truncated prompt tests, better kv cache size (#5933)
* server: tests: add truncated prompt tests, better size
* server, tests : update regex
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 59a59d56..6f444998 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1128,6 +1128,7 @@ struct server_context { LOG_VERBOSE("stopped by limit", { {"id_slot", slot.id}, + {"id_task", slot.id_task}, {"n_decoded", slot.n_decoded}, {"n_predict", slot.params.n_predict}, }); @@ -1141,6 +1142,8 @@ struct server_context { } LOG_VERBOSE("next token", { + {"id_slot", slot.id}, + {"id_task", slot.id_task}, {"token", result.tok}, {"token_text", tokens_to_output_formatted_string(ctx, result.tok)}, {"has_next_token", slot.has_next_token}, @@ -1750,6 +1753,15 @@ struct server_context { slot.n_past = 0; slot.n_prompt_tokens = prompt_tokens.size(); + LOG_VERBOSE("prompt tokenized", { + {"id_slot", slot.id}, + {"id_task", slot.id_task}, + {"n_ctx", slot.n_ctx}, + {"n_keep", slot.params.n_keep}, + {"n_prompt_tokens", slot.n_prompt_tokens}, + {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, + }); + if (slot.embedding) { // this prompt is too large to process - discard it if (slot.n_prompt_tokens > n_batch) { @@ -1788,10 +1800,13 @@ struct server_context { slot.n_prompt_tokens = prompt_tokens.size(); LOG_VERBOSE("input truncated", { - {"n_ctx", slot.n_ctx}, - {"n_keep", slot.params.n_keep}, - {"n_left", n_left}, - {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, + {"id_slot", slot.id}, + {"id_task", slot.id_task}, + {"n_ctx", slot.n_ctx}, + {"n_keep", slot.params.n_keep}, + {"n_left", n_left}, + {"n_prompt_tokens", slot.n_prompt_tokens}, + {"prompt_tokens", tokens_to_str(ctx, prompt_tokens.cbegin(), prompt_tokens.cend())}, }); GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx); |