summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
authorliuwei-git <14815172+liuwei-git@users.noreply.github.com>2024-04-24 15:00:37 +0800
committerGitHub <noreply@github.com>2024-04-24 10:00:37 +0300
commitc8297c6af5693555652c40b95974b95d49d2674d (patch)
tree403d5e80233b90b3a03a7283c297c679400ccf6a /convert-hf-to-gguf.py
parent4e96a812b3ce7322a29a3008db2ed73d9087b176 (diff)
llama : add phi3 support (#6852)
* add explicit phi3 support * add explicit phi3 support * remove unused code * convert : add BOS token * llama : match EOT token <|end|> * llama : minor / style * llama : tabs -> spaces * convert : fix lint checks --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py85
1 files changed, 85 insertions, 0 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 4fd916cb..4ace13eb 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -1979,6 +1979,91 @@ class Phi2Model(Model):
self.gguf_writer.add_add_bos_token(False)
+@Model.register("Phi3ForCausalLM")
+class Phi3MiniModel(Model):
+ model_arch = gguf.MODEL_ARCH.PHI3
+
+ def set_vocab(self):
+ from sentencepiece import SentencePieceProcessor
+
+ tokenizer_path = self.dir_model / 'tokenizer.model'
+
+ if not tokenizer_path.is_file():
+ print(f'Error: Missing {tokenizer_path}', file=sys.stderr)
+ sys.exit(1)
+
+ tokenizer = SentencePieceProcessor(str(tokenizer_path))
+
+ vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
+
+ tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
+ scores: list[float] = [-10000.0] * vocab_size
+ toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+
+ for token_id in range(tokenizer.vocab_size()):
+
+ piece = tokenizer.id_to_piece(token_id)
+ text = piece.encode("utf-8")
+ score = tokenizer.get_score(token_id)
+
+ toktype = SentencePieceTokenTypes.NORMAL
+ if tokenizer.is_unknown(token_id):
+ toktype = SentencePieceTokenTypes.UNKNOWN
+ elif tokenizer.is_control(token_id):
+ toktype = SentencePieceTokenTypes.CONTROL
+ elif tokenizer.is_unused(token_id):
+ toktype = SentencePieceTokenTypes.UNUSED
+ elif tokenizer.is_byte(token_id):
+ toktype = SentencePieceTokenTypes.BYTE
+
+ tokens[token_id] = text
+ scores[token_id] = score
+ toktypes[token_id] = toktype
+
+ added_tokens_file = self.dir_model / 'added_tokens.json'
+ if added_tokens_file.is_file():
+ with open(added_tokens_file, "r", encoding="utf-8") as f:
+ added_tokens_json = json.load(f)
+
+ for key in added_tokens_json:
+ token_id = added_tokens_json[key]
+ if (token_id >= vocab_size):
+ print(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
+ continue
+
+ tokens[token_id] = key.encode("utf-8")
+ scores[token_id] = -1000.0
+ toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
+
+ self.gguf_writer.add_tokenizer_model("llama")
+ self.gguf_writer.add_token_list(tokens)
+ self.gguf_writer.add_token_scores(scores)
+ self.gguf_writer.add_token_types(toktypes)
+
+ special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
+ special_vocab.add_to_gguf(self.gguf_writer)
+
+ def set_gguf_parameters(self):
+ block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
+
+ rot_pct = 1.0
+ n_embd = self.find_hparam(["hidden_size", "n_embd"])
+ n_head = self.find_hparam(["num_attention_heads", "n_head"])
+ rms_eps = self.find_hparam(["rms_norm_eps"])
+
+ self.gguf_writer.add_name("Phi3")
+ self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
+
+ self.gguf_writer.add_embedding_length(n_embd)
+ self.gguf_writer.add_feed_forward_length(8192)
+ self.gguf_writer.add_block_count(block_count)
+ self.gguf_writer.add_head_count(n_head)
+ self.gguf_writer.add_head_count_kv(n_head)
+ self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
+ self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
+ self.gguf_writer.add_file_type(self.ftype)
+
+
@Model.register("PlamoForCausalLM")
class PlamoModel(Model):
model_arch = gguf.MODEL_ARCH.PLAMO