diff options
author | bryanSwk <93190252+bryanSwk@users.noreply.github.com> | 2024-04-04 02:05:10 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-03 21:05:10 +0300 |
commit | bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch) | |
tree | 4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /convert-hf-to-gguf.py | |
parent | 9f62c0173d964972849251c8ad12fc356f5b7896 (diff) |
llama : add SEA-LION support (#6448)
* initial commit for sealion support
* add sealion support
* minor fix
* q/k ln and pos_embd only if required
* Apply suggestions from code review
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* minor : clear whitespaces
---------
Co-authored-by: bryan <bryansiow@aisingapore.org>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-x | convert-hf-to-gguf.py | 15 |
1 files changed, 14 insertions, 1 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index bca1c2d7..c34bccaa 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -510,6 +510,16 @@ class BloomModel(Model): class MPTModel(Model): model_arch = gguf.MODEL_ARCH.MPT + def set_vocab(self): + try: + self._set_vocab_gpt2() + except: + self._set_vocab_sentencepiece() + self.gguf_writer.add_add_bos_token(False) + self.gguf_writer.add_pad_token_id(3) + self.gguf_writer.add_eos_token_id(1) + self.gguf_writer.add_unk_token_id(0) + def set_gguf_parameters(self): block_count = self.hparams["n_layers"] self.gguf_writer.add_name(self.dir_model.name) @@ -523,7 +533,10 @@ class MPTModel(Model): self.gguf_writer.add_layer_norm_eps(1e-5) if self.hparams["attn_config"]["clip_qkv"] is not None: self.gguf_writer.add_clamp_kqv(self.hparams["attn_config"]["clip_qkv"]) - self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"]) + if self.hparams["attn_config"]["alibi"]: + self.gguf_writer.add_max_alibi_bias(self.hparams["attn_config"]["alibi_bias_max"]) + else: + self.gguf_writer.add_max_alibi_bias(0.0) def write_tensors(self): block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers")) |