From 8669c3db2b98f05775292778dd05f424ee0cd250 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Mon, 12 May 2025 07:47:46 +0300
Subject: GPU offload policy (#405)

* Adding GPU offload policy

* Minor

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
---
 include/llama.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/llama.h')

diff --git a/include/llama.h b/include/llama.h
index e2901861..f1511548 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -408,6 +408,7 @@ extern "C" {
         // currently works only with CPU execution
         ggml_abort_callback abort_callback;
         void *              abort_callback_data;
+        void *              offload_policy;
     };
 
     // model quantization parameters
@@ -523,6 +524,8 @@ extern "C" {
                      struct llama_model * model,
             struct llama_context_params   params);
 
+    LLAMA_API void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off);
+
     // Frees all allocated memory
     LLAMA_API void llama_free(struct llama_context * ctx);
 
-- 
cgit v1.2.3