summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/constants.py
diff options
context:
space:
mode:
authorShintarou Okada <kokuzen@gmail.com>2023-12-24 22:35:49 +0900
committerGitHub <noreply@github.com>2023-12-24 15:35:49 +0200
commit753be377b69bda2d65a7e089f2b7f0c53ef3495e (patch)
treeb32ae0b6fb10db974322edeeb22021bc43d1e210 /gguf-py/gguf/constants.py
parent5bf3953d7e9831ea22b0bc017ce97409b801ccf1 (diff)
llama : add PLaMo model (#3557)
* add plamo mock * add tensor loading * plamo convert * update norm * able to compile * fix norm_rms_eps hparam * runnable * use inp_pos * seems ok * update kqv code * remove develop code * update README * shuffle attn_q.weight and attn_output.weight for broadcasting * remove plamo_llm_build_kqv and use llm_build_kqv * fix style * update * llama : remove obsolete KQ_scale * plamo : fix tensor names for correct GPU offload --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r--gguf-py/gguf/constants.py17
1 files changed, 17 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 390dca04..4cd87cdd 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -96,6 +96,7 @@ class MODEL_ARCH(IntEnum):
STABLELM = auto()
QWEN = auto()
PHI2 = auto()
+ PLAMO = auto()
class MODEL_TENSOR(IntEnum):
@@ -142,6 +143,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
MODEL_ARCH.PHI2: "phi2",
+ MODEL_ARCH.PLAMO: "plamo",
}
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
@@ -349,6 +351,21 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN,
MODEL_TENSOR.FFN_UP,
],
+ MODEL_ARCH.PLAMO: [
+ MODEL_TENSOR.TOKEN_EMBD,
+ MODEL_TENSOR.OUTPUT_NORM,
+ MODEL_TENSOR.OUTPUT,
+ MODEL_TENSOR.ROPE_FREQS,
+ MODEL_TENSOR.ATTN_NORM,
+ MODEL_TENSOR.ATTN_Q,
+ MODEL_TENSOR.ATTN_K,
+ MODEL_TENSOR.ATTN_V,
+ MODEL_TENSOR.ATTN_OUT,
+ MODEL_TENSOR.ATTN_ROT_EMBD,
+ MODEL_TENSOR.FFN_GATE,
+ MODEL_TENSOR.FFN_DOWN,
+ MODEL_TENSOR.FFN_UP,
+ ],
MODEL_ARCH.GPT2: [
# TODO
],