diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-02-27 14:35:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-27 14:35:51 +0200 |
commit | 9d533a77d0c3850ce09d736bc1baa67fd6ad27b3 (patch) | |
tree | 25adffcbb0f7c13a8578279456a4937ed73ae3f6 /common/common.h | |
parent | cbbd1efa06f8c09f9dff58ff9d9af509cc4c152b (diff) |
llama : fix defrag bugs + add parameter (#5735)
* llama : fix defrag bugs + enable by default
ggml-ci
* llama : add defrag_thold parameter
ggml-ci
* llama : cont
* llama : disable log message
ggml-ci
* llama : fix graph size check during defrag
Diffstat (limited to 'common/common.h')
-rw-r--r-- | common/common.h | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/common/common.h b/common/common.h index 3e21579b..25003df2 100644 --- a/common/common.h +++ b/common/common.h @@ -75,6 +75,7 @@ struct gpt_params { float yarn_beta_fast = 32.0f; // YaRN low correction dim float yarn_beta_slow = 1.0f; // YaRN high correction dim int32_t yarn_orig_ctx = 0; // YaRN original context length + float defrag_thold = -1.0f; // KV cache defragmentation threshold int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; |