diff options
author | maor-ps <154728172+maor-ps@users.noreply.github.com> | 2024-05-04 12:06:40 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-04 11:06:40 +0200 |
commit | 03fb8a002df2e96104f9e06de9c78d2a8ed91e92 (patch) | |
tree | 1795330f5b96b4cecd41fff6358bbaf792603f60 /examples/server | |
parent | 92139b90af4841d7fd060b526bdd443b621770ff (diff) |
If first token generated from the server is the stop word the server will crash (#7038)
This will reproduce the issue in llama13b
{
'prompt': 'Q: hello world \nA: ',
'stop': ['\n'],
'temperature': 0.0,
'n_predict': 10,
'cache_prompt': True,
'n_probs': 10
}
Diffstat (limited to 'examples/server')
-rw-r--r-- | examples/server/server.cpp | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f60530cf..ff0814b2 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1383,9 +1383,10 @@ struct server_context { if (!slot.params.stream && slot.stopped_word) { const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false); + size_t safe_offset = std::min(slot.generated_token_probs.size(), stop_word_toks.size()); probs = std::vector<completion_token_output>( slot.generated_token_probs.begin(), - slot.generated_token_probs.end() - stop_word_toks.size()); + slot.generated_token_probs.end() - safe_offset); } else { probs = std::vector<completion_token_output>( slot.generated_token_probs.begin(), |