summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-05-12 07:47:46 +0300
committerGitHub <noreply@github.com>2025-05-12 07:47:46 +0300
commit8669c3db2b98f05775292778dd05f424ee0cd250 (patch)
treeed5c6a41e81ecd6b6620b748bfd765997663eb4c /include
parent504fb890d90ec27e5f4822b7bd772fa94d4d6aac (diff)
GPU offload policy (#405)
* Adding GPU offload policy * Minor --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'include')
-rw-r--r--include/llama.h3
1 files changed, 3 insertions, 0 deletions
diff --git a/include/llama.h b/include/llama.h
index e2901861..f1511548 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -408,6 +408,7 @@ extern "C" {
// currently works only with CPU execution
ggml_abort_callback abort_callback;
void * abort_callback_data;
+ void * offload_policy;
};
// model quantization parameters
@@ -523,6 +524,8 @@ extern "C" {
struct llama_model * model,
struct llama_context_params params);
+ LLAMA_API void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off);
+
// Frees all allocated memory
LLAMA_API void llama_free(struct llama_context * ctx);