llama : add SEA-LION support (#6448)

* initial commit for sealion support * add sealion support * minor fix * q/k ln and pos_embd only if required * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * minor : clear whitespaces --------- Co-authored-by: bryan <bryansiow@aisingapore.org> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: bryanSwk <93190252+bryanSwk@users.noreply.github.com> 2024-04-04 02:05:10 +0800
committer: GitHub <noreply@github.com> 2024-04-03 21:05:10 +0300
commit: bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch)
tree: 4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /gguf-py
parent: 9f62c0173d964972849251c8ad12fc356f5b7896 (diff)
2 files changed, 5 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index f468802d..5214764a 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -367,6 +367,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.FFN_DOWN,
         MODEL_TENSOR.FFN_UP,
         MODEL_TENSOR.FFN_ACT,
+        MODEL_TENSOR.ATTN_Q_NORM,
+        MODEL_TENSOR.ATTN_K_NORM,
+        MODEL_TENSOR.POS_EMBD,
     ],
     MODEL_ARCH.GPTJ: [
         MODEL_TENSOR.TOKEN_EMBD,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 93a5a455..345b1b0c 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -285,11 +285,13 @@ class TensorNameMap:
         MODEL_TENSOR.ATTN_Q_NORM: (
             "language_model.encoder.layers.{bid}.self_attention.q_layernorm",
             "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon
+            "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion
         ),
 
         MODEL_TENSOR.ATTN_K_NORM: (
             "language_model.encoder.layers.{bid}.self_attention.k_layernorm",
             "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon
+            "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion
         ),
 
         MODEL_TENSOR.ROPE_FREQS: (
author	bryanSwk <93190252+bryanSwk@users.noreply.github.com>	2024-04-04 02:05:10 +0800
committer	GitHub <noreply@github.com>	2024-04-03 21:05:10 +0300
commit	bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch)
tree	4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /gguf-py
parent	9f62c0173d964972849251c8ad12fc356f5b7896 (diff)