diff options
Diffstat (limited to 'gguf-py')
-rw-r--r-- | gguf-py/gguf/constants.py | 17 | ||||
-rw-r--r-- | gguf-py/gguf/gguf_writer.py | 3 |
2 files changed, 14 insertions, 6 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 692120f4..42df2e4d 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -57,12 +57,13 @@ class Keys: CAUSAL = "{arch}.attention.causal" class Rope: - DIMENSION_COUNT = "{arch}.rope.dimension_count" - FREQ_BASE = "{arch}.rope.freq_base" - SCALING_TYPE = "{arch}.rope.scaling.type" - SCALING_FACTOR = "{arch}.rope.scaling.factor" - SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length" - SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" + DIMENSION_COUNT = "{arch}.rope.dimension_count" + FREQ_BASE = "{arch}.rope.freq_base" + SCALING_TYPE = "{arch}.rope.scaling.type" + SCALING_FACTOR = "{arch}.rope.scaling.factor" + SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor" + SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length" + SCALING_FINETUNED = "{arch}.rope.scaling.finetuned" class SSM: CONV_KERNEL = "{arch}.ssm.conv_kernel" @@ -148,6 +149,8 @@ class MODEL_TENSOR(IntEnum): OUTPUT = auto() OUTPUT_NORM = auto() ROPE_FREQS = auto() + ROPE_FACTORS_LONG = auto() + ROPE_FACTORS_SHORT = auto() ATTN_Q = auto() ATTN_K = auto() ATTN_V = auto() @@ -225,6 +228,8 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.ROPE_FREQS: "rope_freqs", + MODEL_TENSOR.ROPE_FACTORS_LONG: "rope_factors_long", + MODEL_TENSOR.ROPE_FACTORS_SHORT: "rope_factors_short", MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", MODEL_TENSOR.ATTN_NORM_2: "blk.{bid}.attn_norm_2", MODEL_TENSOR.ATTN_QKV: "blk.{bid}.attn_qkv", diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index d5e323a5..8b41b54e 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -433,6 +433,9 @@ class GGUFWriter: def add_rope_scaling_factor(self, value: float) -> None: self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value) + def add_rope_scaling_attn_factors(self, value: Sequence[float]) -> None: + self.add_float32(Keys.Rope.SCALING_ATTN_FACTOR.format(arch=self.arch), value) + def add_rope_scaling_orig_ctx_len(self, value: int) -> None: self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value) |