summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py39
1 files changed, 27 insertions, 12 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index a1c79fd4..b133f3b4 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -23,6 +23,15 @@ if 'NO_LOCAL_GGUF' not in os.environ:
import gguf
+# check for any of the given keys in the dictionary and return the value of the first key found
+def get_key_opts(d, keys):
+ for k in keys:
+ if k in d:
+ return d[k]
+ print(f"Could not find any of {keys}")
+ sys.exit()
+
+
###### MODEL DEFINITIONS ######
class SentencePieceTokenTypes(IntEnum):
@@ -257,10 +266,11 @@ class Model:
toktypes.append(gguf.TokenType.USER_DEFINED)
elif reverse_vocab[i] in added_vocab:
tokens.append(reverse_vocab[i])
- if tokenizer.added_tokens_decoder[i].special:
- toktypes.append(gguf.TokenType.CONTROL)
- else:
- toktypes.append(gguf.TokenType.USER_DEFINED)
+ if hasattr(tokenizer, "added_tokens_decoder"):
+ if tokenizer.added_tokens_decoder[i].special:
+ toktypes.append(gguf.TokenType.CONTROL)
+ else:
+ toktypes.append(gguf.TokenType.USER_DEFINED)
else:
tokens.append(reverse_vocab[i])
toktypes.append(gguf.TokenType.NORMAL)
@@ -1068,17 +1078,22 @@ class GPT2Model(Model):
class Phi2Model(Model):
def set_gguf_parameters(self):
- block_count = self.hparams["n_layer"]
+ block_count = get_key_opts(self.hparams, ["num_hidden_layers", "n_layer"])
+
+ rot_pct = get_key_opts(self.hparams, ["partial_rotary_factor"])
+ n_embd = get_key_opts(self.hparams, ["hidden_size", "n_embd"])
+ n_head = get_key_opts(self.hparams, ["num_attention_heads", "n_head"])
self.gguf_writer.add_name("Phi2")
- self.gguf_writer.add_context_length(self.hparams["n_positions"])
- self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
- self.gguf_writer.add_feed_forward_length(4 * self.hparams["n_embd"])
+ self.gguf_writer.add_context_length(get_key_opts(self.hparams, ["n_positions", "max_position_embeddings"]))
+
+ self.gguf_writer.add_embedding_length(n_embd)
+ self.gguf_writer.add_feed_forward_length(4 * n_embd)
self.gguf_writer.add_block_count(block_count)
- self.gguf_writer.add_head_count(self.hparams["n_head"])
- self.gguf_writer.add_head_count_kv(self.hparams["n_head"])
- self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
- self.gguf_writer.add_rope_dimension_count(self.hparams["rotary_dim"])
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head)
+ self.gguf_writer.add_layer_norm_eps(get_key_opts(self.hparams, ["layer_norm_epsilon", "layer_norm_eps"]))
+ self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
self.gguf_writer.add_file_type(self.ftype)
self.gguf_writer.add_add_bos_token(False)