summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorchiranko <96988916+chiranko@users.noreply.github.com>2024-01-19 17:07:27 +0800
committerGitHub <noreply@github.com>2024-01-19 11:07:27 +0200
commit2b3b999cacc7ad1207c32fbdf3479a19c06e1a34 (patch)
treebe2adbdfb73eb5d24ab9bcbb6f53671b9bb88ffa /gguf-py
parent993fba81807e55d27b570945af8e416d535eced1 (diff)
llama : add CodeShell support (#5016)
* llama: add codeshell support * llama.cpp: fix codeshell with NeoX rope Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py19
-rw-r--r--gguf-py/gguf/tensor_mapping.py1
2 files changed, 20 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 972b4e9a..95c58b41 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -99,6 +99,7 @@ class MODEL_ARCH(IntEnum):
QWEN = auto()
PHI2 = auto()
PLAMO = auto()
+ CODESHELL = auto()
class MODEL_TENSOR(IntEnum):
@@ -147,6 +148,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.QWEN: "qwen",
MODEL_ARCH.PHI2: "phi2",
MODEL_ARCH.PLAMO: "plamo",
+ MODEL_ARCH.CODESHELL: "codeshell",
}
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -396,6 +398,19 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
+ ],
+ MODEL_ARCH.CODESHELL: [
+ MODEL_TENSOR.TOKEN_EMBD,
+ MODEL_TENSOR.POS_EMBD,
+ MODEL_TENSOR.OUTPUT_NORM,
+ MODEL_TENSOR.OUTPUT,
+ MODEL_TENSOR.ATTN_NORM,
+ MODEL_TENSOR.ATTN_QKV,
+ MODEL_TENSOR.ATTN_OUT,
+ MODEL_TENSOR.ATTN_ROT_EMBD,
+ MODEL_TENSOR.FFN_NORM,
+ MODEL_TENSOR.FFN_DOWN,
+ MODEL_TENSOR.FFN_UP,
]
# TODO
}
@@ -417,6 +432,10 @@ MODEL_TENSOR_SKIP: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.ROPE_FREQS,
MODEL_TENSOR.ATTN_ROT_EMBD,
],
+ MODEL_ARCH.CODESHELL: [
+ MODEL_TENSOR.ROPE_FREQS,
+ MODEL_TENSOR.ATTN_ROT_EMBD,
+ ],
}
#
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index e5b14610..de177af1 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -154,6 +154,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
"layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
"model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
+ "transformer.h.{bid}.attn.rotary_emb.inv_freq", # codeshell
),
# Feed-forward norm