summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authormaor-ps <154728172+maor-ps@users.noreply.github.com>2024-05-04 12:06:40 +0300
committerGitHub <noreply@github.com>2024-05-04 11:06:40 +0200
commit03fb8a002df2e96104f9e06de9c78d2a8ed91e92 (patch)
tree1795330f5b96b4cecd41fff6358bbaf792603f60 /examples/server/server.cpp
parent92139b90af4841d7fd060b526bdd443b621770ff (diff)
If first token generated from the server is the stop word the server will crash (#7038)
This will reproduce the issue in llama13b { 'prompt': 'Q: hello world \nA: ', 'stop': ['\n'], 'temperature': 0.0, 'n_predict': 10, 'cache_prompt': True, 'n_probs': 10 }
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp3
1 files changed, 2 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index f60530cf..ff0814b2 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1383,9 +1383,10 @@ struct server_context {
if (!slot.params.stream && slot.stopped_word) {
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
+ size_t safe_offset = std::min(slot.generated_token_probs.size(), stop_word_toks.size());
probs = std::vector<completion_token_output>(
slot.generated_token_probs.begin(),
- slot.generated_token_probs.end() - stop_word_toks.size());
+ slot.generated_token_probs.end() - safe_offset);
} else {
probs = std::vector<completion_token_output>(
slot.generated_token_probs.begin(),