From 8669c3db2b98f05775292778dd05f424ee0cd250 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 12 May 2025 07:47:46 +0300 Subject: GPU offload policy (#405) * Adding GPU offload policy * Minor --------- Co-authored-by: Iwan Kawrakow --- include/llama.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/llama.h') diff --git a/include/llama.h b/include/llama.h index e2901861..f1511548 100644 --- a/include/llama.h +++ b/include/llama.h @@ -408,6 +408,7 @@ extern "C" { // currently works only with CPU execution ggml_abort_callback abort_callback; void * abort_callback_data; + void * offload_policy; }; // model quantization parameters @@ -523,6 +524,8 @@ extern "C" { struct llama_model * model, struct llama_context_params params); + LLAMA_API void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off); + // Frees all allocated memory LLAMA_API void llama_free(struct llama_context * ctx); -- cgit v1.2.3