diff options
author | Pierrick Hymbert <pierrick.hymbert@gmail.com> | 2024-04-11 14:51:07 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-11 14:51:07 +0200 |
commit | b804b1ef77351d2a11be945462c6c251710476cb (patch) | |
tree | f963c03b90a54083ee67c22c882d20e388820897 /examples/imatrix | |
parent | 8228b66dbc16290c5cbd70e80ab47c068e2569d8 (diff) |
eval-callback: Example how to use eval callback for debugging (#6576)
* gguf-debug: Example how to use ggml callback for debugging
* gguf-debug: no mutex, verify type, fix stride.
* llama: cv eval: move cb eval field in common gpt_params
* ggml_debug: use common gpt_params to pass cb eval.
Fix get tensor SIGV random.
* ggml_debug: ci: add tests
* ggml_debug: EOL in CMakeLists.txt
* ggml_debug: Remove unused param n_batch, no batching here
* ggml_debug: fix trailing spaces
* ggml_debug: fix trailing spaces
* common: fix cb_eval and user data not initialized
* ci: build revert label
* ggml_debug: add main test label
* doc: add a model: add a link to ggml-debug
* ggml-debug: add to make toolchain
* ggml-debug: tests add the main label
* ggml-debug: ci add test curl label
* common: allow the warmup to be disabled in llama_init_from_gpt_params
* ci: add curl test
* ggml-debug: better tensor type support
* gitignore : ggml-debug
* ggml-debug: printing also the sum of each tensor
* ggml-debug: remove block size
* eval-callback: renamed from ggml-debug
* eval-callback: fix make toolchain
---------
Co-authored-by: slaren <slarengh@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/imatrix')
-rw-r--r-- | examples/imatrix/imatrix.cpp | 26 |
1 files changed, 10 insertions, 16 deletions
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 1bf55f90..ff624c53 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -597,24 +597,18 @@ int main(int argc, char ** argv) { llama_backend_init(); llama_numa_init(params.numa); - llama_model_params mparams = llama_model_params_from_gpt_params(params); - - llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams); - if (model == NULL) { - fprintf(stderr, "%s: error: unable to load model\n", __func__); - return 1; - } - - llama_context_params cparams = llama_context_params_from_gpt_params(params); - // pass the callback to the backend scheduler // it will be executed for each node during the graph computation - cparams.cb_eval = ik_collect_imatrix; - cparams.cb_eval_user_data = NULL; - - llama_context * ctx = llama_new_context_with_model(model, cparams); - if (ctx == NULL) { - fprintf(stderr, "%s: error: unable to create context\n", __func__); + params.cb_eval = ik_collect_imatrix; + params.cb_eval_user_data = NULL; + params.warmup = false; + + // init + llama_model * model; + llama_context * ctx; + std::tie(model, ctx) = llama_init_from_gpt_params(params); + if (model == nullptr || ctx == nullptr) { + fprintf(stderr, "%s : failed to init\n", __func__); return 1; } |