From 31cfbb1013a482e89c72146e2063ac4362becae7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20L=C3=BCtke?= Date: Wed, 5 Jul 2023 16:51:13 -0400 Subject: Expose generation timings from server & update completions.js (#2116) * use javascript generators as much cleaner API Also add ways to access completion as promise and EventSource * export llama_timings as struct and expose them in server * update readme, update baked includes * llama : uniform variable names + struct init --------- Co-authored-by: Georgi Gerganov --- llama.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'llama.h') diff --git a/llama.h b/llama.h index 5bb1964b..c1e7dab9 100644 --- a/llama.h +++ b/llama.h @@ -134,6 +134,20 @@ extern "C" { bool quantize_output_tensor; // quantize output.weight } llama_model_quantize_params; + // performance timing information + struct llama_timings { + double t_start_ms; + double t_end_ms; + double t_load_ms; + double t_sample_ms; + double t_p_eval_ms; + double t_eval_ms; + + int32_t n_sample; + int32_t n_p_eval; + int32_t n_eval; + }; + LLAMA_API struct llama_context_params llama_context_default_params(); LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(); @@ -331,6 +345,7 @@ extern "C" { LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates); // Performance information + LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx); LLAMA_API void llama_print_timings(struct llama_context * ctx); LLAMA_API void llama_reset_timings(struct llama_context * ctx); -- cgit v1.2.3