diff options
author | Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> | 2024-03-02 01:00:46 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-01 21:30:46 +0200 |
commit | c29af7e2252d288f2ea58a7d437c1cb7c0abf160 (patch) | |
tree | b17451289ae835cb33f10c79db82a1e91004e225 /gguf-py/gguf/tensor_mapping.py | |
parent | 38d16b142624bdd7c41d9955752b7f7b59c5e048 (diff) |
llama : add StarCoder2 support (#5795)
* Add support for starcoder2
* handle rope type
* skip rope freq and rotary embeddings from being serialized
* resolve comments
* Update llama.cpp
* remove redundant changes
* handle `rope-theta`
* llama : change starcoder2 rope type
* address comment
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 86100377..db2ec970 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -210,6 +210,7 @@ class TensorNameMap: "model.layers.layers.{bid}.mlp.up_proj", # plamo "model.layers.{bid}.feed_forward.w3", # internlm2 "encoder.layers.{bid}.mlp.fc11", # nomic-bert + "model.layers.{bid}.mlp.c_fc", # starcoder2 ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -256,6 +257,7 @@ class TensorNameMap: "model.layers.layers.{bid}.mlp.down_proj", # plamo "model.layers.{bid}.feed_forward.w2", # internlm2 "encoder.layers.{bid}.mlp.fc2", # nomic-bert + "model.layers.{bid}.mlp.c_proj", # starcoder2 ), MODEL_TENSOR.FFN_DOWN_EXP: ( |