summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-02-27 14:35:51 +0200
committerGitHub <noreply@github.com>2024-02-27 14:35:51 +0200
commit9d533a77d0c3850ce09d736bc1baa67fd6ad27b3 (patch)
tree25adffcbb0f7c13a8578279456a4937ed73ae3f6 /llama.h
parentcbbd1efa06f8c09f9dff58ff9d9af509cc4c152b (diff)
llama : fix defrag bugs + add parameter (#5735)
* llama : fix defrag bugs + enable by default ggml-ci * llama : add defrag_thold parameter ggml-ci * llama : cont * llama : disable log message ggml-ci * llama : fix graph size check during defrag
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 3ff77d5a..60416180 100644
--- a/llama.h
+++ b/llama.h
@@ -245,6 +245,7 @@ extern "C" {
float yarn_beta_fast; // YaRN low correction dim
float yarn_beta_slow; // YaRN high correction dim
uint32_t yarn_orig_ctx; // YaRN original context size
+ float defrag_thold; // defragment the KV cache if holes/size > thold, < 0 disabled (default)
ggml_backend_sched_eval_callback cb_eval;
void * cb_eval_user_data;