From 9d533a77d0c3850ce09d736bc1baa67fd6ad27b3 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 27 Feb 2024 14:35:51 +0200 Subject: llama : fix defrag bugs + add parameter (#5735) * llama : fix defrag bugs + enable by default ggml-ci * llama : add defrag_thold parameter ggml-ci * llama : cont * llama : disable log message ggml-ci * llama : fix graph size check during defrag --- common/common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'common/common.h') diff --git a/common/common.h b/common/common.h index 3e21579b..25003df2 100644 --- a/common/common.h +++ b/common/common.h @@ -75,6 +75,7 @@ struct gpt_params { float yarn_beta_fast = 32.0f; // YaRN low correction dim float yarn_beta_slow = 1.0f; // YaRN high correction dim int32_t yarn_orig_ctx = 0; // YaRN original context length + float defrag_thold = -1.0f; // KV cache defragmentation threshold int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; -- cgit v1.2.3