summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py2
-rw-r--r--gguf-py/gguf/gguf_writer.py6
2 files changed, 8 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index ae62cc57..f0a1c51f 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -46,6 +46,8 @@ class Keys:
HEAD_COUNT_KV = "{arch}.attention.head_count_kv"
MAX_ALIBI_BIAS = "{arch}.attention.max_alibi_bias"
CLAMP_KQV = "{arch}.attention.clamp_kqv"
+ KEY_LENGTH = "{arch}.attention.key_length"
+ VALUE_LENGTH = "{arch}.attention.value_length"
LAYERNORM_EPS = "{arch}.attention.layer_norm_epsilon"
LAYERNORM_RMS_EPS = "{arch}.attention.layer_norm_rms_epsilon"
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 73e02160..d93aaa87 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -333,6 +333,12 @@ class GGUFWriter:
def add_head_count_kv(self, count: int) -> None:
self.add_uint32(Keys.Attention.HEAD_COUNT_KV.format(arch=self.arch), count)
+ def add_key_length(self, length: int) -> None:
+ self.add_uint32(Keys.Attention.KEY_LENGTH.format(arch=self.arch), length)
+
+ def add_value_length(self, length: int) -> None:
+ self.add_uint32(Keys.Attention.VALUE_LENGTH.format(arch=self.arch), length)
+
def add_max_alibi_bias(self, bias: float) -> None:
self.add_float32(Keys.Attention.MAX_ALIBI_BIAS.format(arch=self.arch), bias)