diff options
author | Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> | 2024-03-02 01:00:46 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-01 21:30:46 +0200 |
commit | c29af7e2252d288f2ea58a7d437c1cb7c0abf160 (patch) | |
tree | b17451289ae835cb33f10c79db82a1e91004e225 /gguf-py/gguf/constants.py | |
parent | 38d16b142624bdd7c41d9955752b7f7b59c5e048 (diff) |
llama : add StarCoder2 support (#5795)
* Add support for starcoder2
* handle rope type
* skip rope freq and rotary embeddings from being serialized
* resolve comments
* Update llama.cpp
* remove redundant changes
* handle `rope-theta`
* llama : change starcoder2 rope type
* address comment
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 8f9139d1..5db760cb 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -112,6 +112,7 @@ class MODEL_ARCH(IntEnum): INTERNLM2 = auto() MINICPM = auto() GEMMA = auto() + STARCODER2 = auto() class MODEL_TENSOR(IntEnum): @@ -169,6 +170,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.INTERNLM2: "internlm2", MODEL_ARCH.MINICPM: "minicpm", MODEL_ARCH.GEMMA: "gemma", + MODEL_ARCH.STARCODER2: "starcoder2", } TENSOR_NAMES: dict[MODEL_TENSOR, str] = { @@ -526,6 +528,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_UP, MODEL_TENSOR.FFN_NORM, ], + MODEL_ARCH.STARCODER2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], # TODO } @@ -554,6 +571,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.ROPE_FREQS, MODEL_TENSOR.ATTN_ROT_EMBD, ], + MODEL_ARCH.STARCODER2: [ + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_ROT_EMBD, + ], } # |