summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-06-18 17:41:26 +0200
committerGitHub <noreply@github.com>2023-06-18 17:41:26 +0200
commitb24c3049d96557c24782e4d32feaae65f47277af (patch)
tree0d2223baa728c6bba59c57b8e926e4125bcd3c37 /llama.cpp
parent0ede372a51fd8160688e01b587582666c14e94e5 (diff)
Added tokens per second to info prints (#1928)
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp9
1 files changed, 6 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 45360cea..2105e327 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3467,9 +3467,12 @@ void llama_print_timings(struct llama_context * ctx) {
fprintf(stderr, "\n");
fprintf(stderr, "%s: load time = %8.2f ms\n", __func__, ctx->t_load_us / 1000.0);
- fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample);
- fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval);
- fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token)\n", __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval);
+ fprintf(stderr, "%s: sample time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
+ __func__, 1e-3 * ctx->t_sample_us, n_sample, 1e-3 * ctx->t_sample_us / n_sample, 1e6 / ctx->t_sample_us * n_sample);
+ fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
+ __func__, 1e-3 * ctx->t_p_eval_us, n_p_eval, 1e-3 * ctx->t_p_eval_us / n_p_eval, 1e6 / ctx->t_p_eval_us * n_p_eval);
+ fprintf(stderr, "%s: eval time = %8.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n",
+ __func__, 1e-3 * ctx->t_eval_us, n_eval, 1e-3 * ctx->t_eval_us / n_eval, 1e6 / ctx->t_eval_us * n_eval);
fprintf(stderr, "%s: total time = %8.2f ms\n", __func__, (t_end_us - ctx->t_start_us)/1000.0);
}