llama : add SEA-LION support (#6448)

* initial commit for sealion support * add sealion support * minor fix * q/k ln and pos_embd only if required * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * minor : clear whitespaces --------- Co-authored-by: bryan <bryansiow@aisingapore.org> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: bryanSwk <93190252+bryanSwk@users.noreply.github.com> 2024-04-04 02:05:10 +0800
committer: GitHub <noreply@github.com> 2024-04-03 21:05:10 +0300
commit: bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch)
tree: 4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /convert-hf-to-gguf.py
parent: 9f62c0173d964972849251c8ad12fc356f5b7896 (diff)
1 files changed, 14 insertions, 1 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index bca1c2d7..c34bccaa 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -510,6 +510,16 @@ class BloomModel(Model):
 class MPTModel(Model):
     model_arch = gguf.MODEL_ARCH.MPT
 
+    def set_vocab(self):
+        try:
+            self._set_vocab_gpt2()
+        except:
+            self._set_vocab_sentencepiece()
+            self.gguf_writer.add_add_bos_token(False)
+            self.gguf_writer.add_pad_token_id(3)
+            self.gguf_writer.add_eos_token_id(1)
+            self.gguf_writer.add_unk_token_id(0)
+
     def set_gguf_parameters(self):
         block_count = self.hparams["n_layers"]
         self.gguf_writer.add_name(self.dir_model.name)
@@ -523,7 +533,10 @@ class MPTModel(Model):
         self.gguf_writer.add_layer_norm_eps(1e-5)
         if self.hparams["attn_config"]["clip_qkv"] is not None:
             self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"])
-        self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
+        if self.hparams["attn_config"]["alibi"]:
+            self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"])
+        else:
+            self.gguf_writer.add_max_alibi_bias(0.0)
 
     def write_tensors(self):
         block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers"))
author	bryanSwk <93190252+bryanSwk@users.noreply.github.com>	2024-04-04 02:05:10 +0800
committer	GitHub <noreply@github.com>	2024-04-03 21:05:10 +0300
commit	bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch)
tree	4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /convert-hf-to-gguf.py
parent	9f62c0173d964972849251c8ad12fc356f5b7896 (diff)