diff options
author | Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> | 2024-04-09 09:16:13 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-09 11:16:13 +0300 |
commit | 5dc9dd7152dedc6046b646855585bd070c91e8c8 (patch) | |
tree | d2bae3652d91cdd9327e28fa85d167a67e050c53 /gguf-py | |
parent | e11a8999b5690f810c2c99c14347f0834e68c524 (diff) |
llama : add Command R Plus support (#6491)
* Add Command R Plus GGUF
* Add Command R Plus GGUF
* Loading works up to LayerNorm2D
* Export new tensors in 1D so they are not quantized.
* Fix embedding layer based on Noeda's example
* Whitespace
* Add line
* Fix unexpected tokens on MPS. Re-add F16 fix. ((Noeda)
* dranger003: Fix block index overflow in CUDA dequantizing.
* Reverted blocked multiplication code as it still has issues and could affect other Llama arches
* export norms as f32
* fix overflow issues during quant and other cleanup
* Type convention
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* dranger003: Fix more int overflow during quant.
---------
Co-authored-by: S <seast@Ss-Mac-Studio.local>
Co-authored-by: S <s@example.com>
Co-authored-by: slaren <slarengh@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py')
-rw-r--r-- | gguf-py/gguf/constants.py | 2 | ||||
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 2 |
2 files changed, 4 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index c44d8abe..a6454a10 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -639,6 +639,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_GATE, MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_Q_NORM, ], # TODO } diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 345b1b0c..4f02d298 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -285,12 +285,14 @@ class TensorNameMap: MODEL_TENSOR.ATTN_Q_NORM: ( "language_model.encoder.layers.{bid}.self_attention.q_layernorm", "model.layers.{bid}.self_attn.q_layernorm", # persimmon + "model.layers.{bid}.self_attn.q_norm", # cohere "transformer.blocks.{bid}.attn.q_ln", # sea-lion ), MODEL_TENSOR.ATTN_K_NORM: ( "language_model.encoder.layers.{bid}.self_attention.k_layernorm", "model.layers.{bid}.self_attn.k_layernorm", # persimmon + "model.layers.{bid}.self_attn.k_norm", # cohere "transformer.blocks.{bid}.attn.k_ln", # sea-lion ), |