From 83e633c27efdf0eb0ba54249e784b0ea760b1007 Mon Sep 17 00:00:00 2001 From: postmasters Date: Tue, 2 Jan 2024 03:51:28 -0800 Subject: llama : differentiate the KV dims in the attention (#4657) * Add n_key_dim and n_value_dim Some models use values that are not derived from `n_embd`. Also remove `n_embd_head` and `n_embd_gqa` because it is not clear which "head" is referred to (key or value). Fix issue #4648. * Fix `llm_build_kqv` to use `n_value_gqa` * Rebase * Rename variables * Fix llm_build_kqv to be more generic wrt n_embd_head_k * Update default values for n_embd_head_k and n_embd_head_v Co-authored-by: Georgi Gerganov * Fix llm_load_tensors: the asserts were not backcompat --------- Co-authored-by: Georgi Gerganov --- gguf-py/gguf/constants.py | 2 ++ 1 file changed, 2 insertions(+) (limited to 'gguf-py/gguf/constants.py') diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index ae62cc57..f0a1c51f 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -46,6 +46,8 @@ class Keys: HEAD_COUNT_KV = "{arch}.attention.head_count_kv" MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias" CLAMP_KQV = "{arch}.attention.clamp_kqv" + KEY_LENGTH = "{arch}.attention.key_length" + VALUE_LENGTH = "{arch}.attention.value_length" LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon" LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon" -- cgit v1.2.3