diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-05-12 07:47:46 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-12 07:47:46 +0300 |
commit | 8669c3db2b98f05775292778dd05f424ee0cd250 (patch) | |
tree | ed5c6a41e81ecd6b6620b748bfd765997663eb4c /common/common.h | |
parent | 504fb890d90ec27e5f4822b7bd772fa94d4d6aac (diff) |
GPU offload policy (#405)
* Adding GPU offload policy
* Minor
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'common/common.h')
-rw-r--r-- | common/common.h | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/common/common.h b/common/common.h index b4f75236..fd83c9d3 100644 --- a/common/common.h +++ b/common/common.h @@ -143,6 +143,7 @@ struct gpt_params { std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) std::vector<llama_model_kv_override> kv_overrides; std::vector<llama_model_tensor_buft_override> tensor_buft_overrides; + std::vector<std::pair<int,int>> offload_policy; bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_lora_adapter_apply) std::vector<llama_lora_adapter_info> lora_adapters; // lora adapter path with user defined scale |