summaryrefslogtreecommitdiff
path: root/examples/imatrix/imatrix.cpp
diff options
context:
space:
mode:
authorPierrick Hymbert <pierrick.hymbert@gmail.com>2024-04-11 14:51:07 +0200
committerGitHub <noreply@github.com>2024-04-11 14:51:07 +0200
commitb804b1ef77351d2a11be945462c6c251710476cb (patch)
treef963c03b90a54083ee67c22c882d20e388820897 /examples/imatrix/imatrix.cpp
parent8228b66dbc16290c5cbd70e80ab47c068e2569d8 (diff)
eval-callback: Example how to use eval callback for debugging (#6576)
* gguf-debug: Example how to use ggml callback for debugging * gguf-debug: no mutex, verify type, fix stride. * llama: cv eval: move cb eval field in common gpt_params * ggml_debug: use common gpt_params to pass cb eval. Fix get tensor SIGV random. * ggml_debug: ci: add tests * ggml_debug: EOL in CMakeLists.txt * ggml_debug: Remove unused param n_batch, no batching here * ggml_debug: fix trailing spaces * ggml_debug: fix trailing spaces * common: fix cb_eval and user data not initialized * ci: build revert label * ggml_debug: add main test label * doc: add a model: add a link to ggml-debug * ggml-debug: add to make toolchain * ggml-debug: tests add the main label * ggml-debug: ci add test curl label * common: allow the warmup to be disabled in llama_init_from_gpt_params * ci: add curl test * ggml-debug: better tensor type support * gitignore : ggml-debug * ggml-debug: printing also the sum of each tensor * ggml-debug: remove block size * eval-callback: renamed from ggml-debug * eval-callback: fix make toolchain --------- Co-authored-by: slaren <slarengh@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/imatrix/imatrix.cpp')
-rw-r--r--examples/imatrix/imatrix.cpp26
1 files changed, 10 insertions, 16 deletions
diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 1bf55f90..ff624c53 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -597,24 +597,18 @@ int main(int argc, char ** argv) {
llama_backend_init();
llama_numa_init(params.numa);
- llama_model_params mparams = llama_model_params_from_gpt_params(params);
-
- llama_model * model = llama_load_model_from_file(params.model.c_str(), mparams);
- if (model == NULL) {
- fprintf(stderr, "%s: error: unable to load model\n", __func__);
- return 1;
- }
-
- llama_context_params cparams = llama_context_params_from_gpt_params(params);
-
// pass the callback to the backend scheduler
// it will be executed for each node during the graph computation
- cparams.cb_eval = ik_collect_imatrix;
- cparams.cb_eval_user_data = NULL;
-
- llama_context * ctx = llama_new_context_with_model(model, cparams);
- if (ctx == NULL) {
- fprintf(stderr, "%s: error: unable to create context\n", __func__);
+ params.cb_eval = ik_collect_imatrix;
+ params.cb_eval_user_data = NULL;
+ params.warmup = false;
+
+ // init
+ llama_model * model;
+ llama_context * ctx;
+ std::tie(model, ctx) = llama_init_from_gpt_params(params);
+ if (model == nullptr || ctx == nullptr) {
+ fprintf(stderr, "%s : failed to init\n", __func__);
return 1;
}