From 8669c3db2b98f05775292778dd05f424ee0cd250 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 12 May 2025 07:47:46 +0300 Subject: GPU offload policy (#405) * Adding GPU offload policy * Minor --------- Co-authored-by: Iwan Kawrakow --- common/common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'common/common.h') diff --git a/common/common.h b/common/common.h index b4f75236..fd83c9d3 100644 --- a/common/common.h +++ b/common/common.h @@ -143,6 +143,7 @@ struct gpt_params { std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) std::vector kv_overrides; std::vector tensor_buft_overrides; + std::vector> offload_policy; bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply) std::vector lora_adapters; // lora adapter path with user defined scale -- cgit v1.2.3