summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rwxr-xr-xconvert_hf_to_gguf.py23
-rwxr-xr-xconvert_lora_to_gguf.py4
-rw-r--r--gguf-py/gguf/constants.py2
-rw-r--r--gguf-py/gguf/tensor_mapping.py3
4 files changed, 27 insertions, 5 deletions
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
index 16f97ab0..966cfcd3 100755
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -14,6 +14,7 @@ from enum import IntEnum
from pathlib import Path
from hashlib import sha256
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
+from itertools import chain
import math
import numpy as np
@@ -256,10 +257,14 @@ class Model:
return False
+ # some models need extra generated tensors (like rope_freqs)
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
+ return ()
+
def prepare_tensors(self):
max_name_len = max(len(s) for _, s in self.tensor_map.mapping.values()) + len(".weight,")
- for name, data_torch in self.get_tensors():
+ for name, data_torch in chain(self.generate_extra_tensors(), self.get_tensors()):
# we don't need these
if name.endswith((".attention.masked_bias", ".attention.bias", ".rotary_emb.inv_freq")):
continue
@@ -1559,7 +1564,7 @@ class LlamaModel(Model):
return [(self.map_tensor_name(name), data_torch)]
- def prepare_tensors(self):
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
if rope_scaling.get("rope_type", '').lower() == "llama3":
base = self.hparams.get("rope_theta", 10000.0)
@@ -1586,8 +1591,9 @@ class LlamaModel(Model):
smooth = (old_context_len / wavelen - low_freq_factor) / (high_freq_factor - low_freq_factor)
rope_factors.append(1 / ((1 - smooth) / factor + smooth))
- self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
+ yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), torch.tensor(rope_factors, dtype=torch.float32))
+ def prepare_tensors(self):
super().prepare_tensors()
if self._experts is not None:
@@ -2307,6 +2313,13 @@ class Phi3MiniModel(Model):
self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_sliding_window(self.find_hparam(["sliding_window"]))
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
+ n_embd = self.find_hparam(["hidden_size", "n_embd"])
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
+ orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
+ rope_dims = n_embd // n_head
+
# write rope scaling for long context (128k) model
rope_scaling = self.find_hparam(['rope_scaling'], True)
if rope_scaling is None:
@@ -2336,8 +2349,8 @@ class Phi3MiniModel(Model):
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
- self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
- self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
+ yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_LONG), torch.tensor(long_factors, dtype=torch.float32))
+ yield (self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT), torch.tensor(short_factors, dtype=torch.float32))
@Model.register("PlamoForCausalLM")
diff --git a/convert_lora_to_gguf.py b/convert_lora_to_gguf.py
index a88d0d4a..ef088034 100755
--- a/convert_lora_to_gguf.py
+++ b/convert_lora_to_gguf.py
@@ -331,6 +331,10 @@ if __name__ == '__main__':
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha)
super().set_gguf_parameters()
+ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]:
+ # Never add extra tensors (e.g. rope_freqs) for LoRA adapters
+ return ()
+
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
tensor_map: dict[str, PartialLoraTensor] = {}
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index e2f4eb1a..6819979f 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -806,6 +806,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
+ MODEL_TENSOR.ROPE_FACTORS_LONG,
+ MODEL_TENSOR.ROPE_FACTORS_SHORT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_Q,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 3ff70cd7..9688b02c 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -82,6 +82,9 @@ class TensorNameMap:
"rope.freqs", # llama-pth
"rotary_pos_emb.inv_freq", # chatglm
),
+
+ MODEL_TENSOR.ROPE_FACTORS_LONG: (),
+ MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
}
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {