From b804b1ef77351d2a11be945462c6c251710476cb Mon Sep 17 00:00:00 2001 From: Pierrick Hymbert Date: Thu, 11 Apr 2024 14:51:07 +0200 Subject: eval-callback: Example how to use eval callback for debugging (#6576) * gguf-debug: Example how to use ggml callback for debugging * gguf-debug: no mutex, verify type, fix stride. * llama: cv eval: move cb eval field in common gpt_params * ggml_debug: use common gpt_params to pass cb eval. Fix get tensor SIGV random. * ggml_debug: ci: add tests * ggml_debug: EOL in CMakeLists.txt * ggml_debug: Remove unused param n_batch, no batching here * ggml_debug: fix trailing spaces * ggml_debug: fix trailing spaces * common: fix cb_eval and user data not initialized * ci: build revert label * ggml_debug: add main test label * doc: add a model: add a link to ggml-debug * ggml-debug: add to make toolchain * ggml-debug: tests add the main label * ggml-debug: ci add test curl label * common: allow the warmup to be disabled in llama_init_from_gpt_params * ci: add curl test * ggml-debug: better tensor type support * gitignore : ggml-debug * ggml-debug: printing also the sum of each tensor * ggml-debug: remove block size * eval-callback: renamed from ggml-debug * eval-callback: fix make toolchain --------- Co-authored-by: slaren Co-authored-by: Georgi Gerganov --- common/common.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'common/common.cpp') diff --git a/common/common.cpp b/common/common.cpp index 98fc8388..dda51478 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1745,6 +1745,8 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param cparams.yarn_orig_ctx = params.yarn_orig_ctx; cparams.pooling_type = params.pooling_type; cparams.defrag_thold = params.defrag_thold; + cparams.cb_eval = params.cb_eval; + cparams.cb_eval_user_data = params.cb_eval_user_data; cparams.offload_kqv = !params.no_kv_offload; cparams.type_k = kv_cache_type_from_str(params.cache_type_k); @@ -2192,7 +2194,7 @@ std::tuple llama_init_from_gpt_par params.sparams.logit_bias[llama_token_eos(model)] = -INFINITY; } - { + if (params.warmup) { LOG("warming up the model with an empty run\n"); std::vector tmp = { llama_token_bos(model), llama_token_eos(model), }; -- cgit v1.2.3