summaryrefslogtreecommitdiff
path: root/common/common.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-02-27 14:35:51 +0200
committerGitHub <noreply@github.com>2024-02-27 14:35:51 +0200
commit9d533a77d0c3850ce09d736bc1baa67fd6ad27b3 (patch)
tree25adffcbb0f7c13a8578279456a4937ed73ae3f6 /common/common.h
parentcbbd1efa06f8c09f9dff58ff9d9af509cc4c152b (diff)
llama : fix defrag bugs + add parameter (#5735)
* llama : fix defrag bugs + enable by default ggml-ci * llama : add defrag_thold parameter ggml-ci * llama : cont * llama : disable log message ggml-ci * llama : fix graph size check during defrag
Diffstat (limited to 'common/common.h')
-rw-r--r--common/common.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/common/common.h b/common/common.h
index 3e21579b..25003df2 100644
--- a/common/common.h
+++ b/common/common.h
@@ -75,6 +75,7 @@ struct gpt_params {
float yarn_beta_fast = 32.0f; // YaRN low correction dim
float yarn_beta_slow = 1.0f; // YaRN high correction dim
int32_t yarn_orig_ctx = 0; // YaRN original context length
+ float defrag_thold = -1.0f; // KV cache defragmentation threshold
int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;