summaryrefslogtreecommitdiff
path: root/examples/server/server.cpp
diff options
context:
space:
mode:
authorJhen-Jie Hong <iainst0409@gmail.com>2023-10-05 09:02:55 -0500
committerGitHub <noreply@github.com>2023-10-05 17:02:55 +0300
commite8b8d32e8663ffc55a02c9721af3a5190382cbb0 (patch)
tree41b946d7eb17ea46167eb3a3a6885dbf3777e752 /examples/server/server.cpp
parent8f3a642ec1d878b2d0a0d15e3a4277f522790d4c (diff)
server : fix incorrect num_tokens_predicted (#3480)
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r--examples/server/server.cpp8
1 files changed, 5 insertions, 3 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 921eb5da..6e31e133 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -504,9 +504,11 @@ struct llama_server_context
});
}
+ bool tg = true;
while (n_past < embd.size())
{
int n_eval = (int)embd.size() - n_past;
+ tg = n_eval == 1;
if (n_eval > params.n_batch)
{
n_eval = params.n_batch;
@@ -633,7 +635,9 @@ struct llama_server_context
last_n_tokens.erase(last_n_tokens.begin());
last_n_tokens.push_back(result.tok);
- num_tokens_predicted++;
+ if (tg) {
+ num_tokens_predicted++;
+ }
}
// add it to the context
@@ -1124,8 +1128,6 @@ static json format_timings(llama_server_context &llama)
{
const auto timings = llama_get_timings(llama.ctx);
- assert(timings.n_eval == ptrdiff_t(llama.num_tokens_predicted));
-
return json{
{"prompt_n", timings.n_p_eval},
{"prompt_ms", timings.t_p_eval_ms},