summaryrefslogtreecommitdiff
path: root/examples/server
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-01-04 19:56:33 +0200
committerGitHub <noreply@github.com>2024-01-04 19:56:33 +0200
commit012cf349aec8ffb47c9def5dc018240fa3721e8b (patch)
tree08334edbf63ae07e19624bb7da7b43b1dbeabd88 /examples/server
parenta91928014fcf51fe297823fcff0788de4f14206e (diff)
server : send token probs for "stream == false" (#4714)
Diffstat (limited to 'examples/server')
-rw-r--r--examples/server/server.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index e45ea809..d1469fb0 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1265,7 +1265,7 @@ struct llama_server_context
{
std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
- size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
+ size_t probs_pos = std::min(slot.sent_token_probs_index, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.sent_token_probs_index + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos)
{
@@ -1325,7 +1325,7 @@ struct llama_server_context
{
probs = std::vector<completion_token_output>(
slot.generated_token_probs.begin(),
- slot.generated_token_probs.begin() + slot.sent_token_probs_index);
+ slot.generated_token_probs.end());
}
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs);
}