diff options
author | bryanSwk <93190252+bryanSwk@users.noreply.github.com> | 2024-04-04 02:05:10 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-03 21:05:10 +0300 |
commit | bb43cf7e9d86d69ffd9c7f008f75db890a35b45a (patch) | |
tree | 4abfed45ae4e8c2dd59c0a49df4c0f0cf801515e /gguf-py | |
parent | 9f62c0173d964972849251c8ad12fc356f5b7896 (diff) |
llama : add SEA-LION support (#6448)
* initial commit for sealion support
* add sealion support
* minor fix
* q/k ln and pos_embd only if required
* Apply suggestions from code review
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* minor : clear whitespaces
---------
Co-authored-by: bryan <bryansiow@aisingapore.org>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py')
-rw-r--r-- | gguf-py/gguf/constants.py | 3 | ||||
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 2 |
2 files changed, 5 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index f468802d..5214764a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -367,6 +367,9 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_ACT, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.POS_EMBD, ], MODEL_ARCH.GPTJ: [ MODEL_TENSOR.TOKEN_EMBD, diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 93a5a455..345b1b0c 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -285,11 +285,13 @@ class TensorNameMap: MODEL_TENSOR.ATTN_Q_NORM: ( "language_model.encoder.layers.{bid}.self_attention.q_layernorm", "model.layers.{bid}.self_attn.q_layernorm", # persimmon + "transformer.blocks.{bid}.attn.q_ln", # sea-lion ), MODEL_TENSOR.ATTN_K_NORM: ( "language_model.encoder.layers.{bid}.self_attention.k_layernorm", "model.layers.{bid}.self_attn.k_layernorm", # persimmon + "transformer.blocks.{bid}.attn.k_ln", # sea-lion ), MODEL_TENSOR.ROPE_FREQS: ( |