summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py49
1 files changed, 43 insertions, 6 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 6357d403..daad1c4f 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -14,6 +14,7 @@ from pathlib import Path
from hashlib import sha256
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
+import math
import numpy as np
import torch
@@ -1784,23 +1785,59 @@ class Phi3MiniModel(Model):
def set_gguf_parameters(self):
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
- rot_pct = 1.0
n_embd = self.find_hparam(["hidden_size", "n_embd"])
n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ n_head_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"])
rms_eps = self.find_hparam(["rms_norm_eps"])
+ max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
+ orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
+ rope_dims = n_embd // n_head
self.gguf_writer.add_name("Phi3")
- self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
-
+ self.gguf_writer.add_context_length(max_pos_embds)
+ self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
self.gguf_writer.add_embedding_length(n_embd)
- self.gguf_writer.add_feed_forward_length(8192)
+ self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_head_count(n_head)
- self.gguf_writer.add_head_count_kv(n_head)
+ self.gguf_writer.add_head_count_kv(n_head_kv)
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
- self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
+ self.gguf_writer.add_rope_dimension_count(rope_dims)
+ self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
self.gguf_writer.add_file_type(self.ftype)
+ # write rope scaling for long context (128k) model
+ rope_scaling = self.find_hparam(['rope_scaling'], True)
+ if (rope_scaling is None):
+ return
+
+ scale = max_pos_embds / orig_max_pos_embds
+
+ rope_scaling_type = rope_scaling.get('type', '').lower()
+ if len(rope_scaling_type) == 0:
+ raise KeyError('Missing the required key rope_scaling.type')
+
+ if rope_scaling_type == 'su':
+ attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
+ elif rope_scaling_type == 'yarn':
+ attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
+ else:
+ raise NotImplementedError(f'The rope scaling type {rope_scaling_type} is not supported yet')
+
+ self.gguf_writer.add_rope_scaling_attn_factors(attn_factor)
+
+ long_factors = rope_scaling.get('long_factor', None)
+ short_factors = rope_scaling.get('short_factor', None)
+
+ if long_factors is None or short_factors is None:
+ raise KeyError('Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
+
+ if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
+ raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
+
+ self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_LONG] + ".weight", np.array(long_factors, dtype=np.float32))
+ self.gguf_writer.add_tensor(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.ROPE_FACTORS_SHORT] + ".weight", np.array(short_factors, dtype=np.float32))
+
@Model.register("PlamoForCausalLM")
class PlamoModel(Model):