summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/tensor_mapping.py
diff options
context:
space:
mode:
authorShintarou Okada <kokuzen@gmail.com>2023-12-24 22:35:49 +0900
committerGitHub <noreply@github.com>2023-12-24 15:35:49 +0200
commit753be377b69bda2d65a7e089f2b7f0c53ef3495e (patch)
treeb32ae0b6fb10db974322edeeb22021bc43d1e210 /gguf-py/gguf/tensor_mapping.py
parent5bf3953d7e9831ea22b0bc017ce97409b801ccf1 (diff)
llama : add PLaMo model (#3557)
* add plamo mock * add tensor loading * plamo convert * update norm * able to compile * fix norm_rms_eps hparam * runnable * use inp_pos * seems ok * update kqv code * remove develop code * update README * shuffle attn_q.weight and attn_output.weight for broadcasting * remove plamo_llm_build_kqv and use llm_build_kqv * fix style * update * llama : remove obsolete KQ_scale * plamo : fix tensor names for correct GPU offload --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r--gguf-py/gguf/tensor_mapping.py37
1 files changed, 23 insertions, 14 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 6fcbdbc1..446c6b68 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -79,6 +79,7 @@ class TensorNameMap:
"language_model.encoder.layers.{bid}.input_layernorm", # persimmon
"model.layers.{bid}.ln1", # yi
"transformer.h.{bid}.ln", # phi2
+ "model.layers.layers.{bid}.norm", # plamo
),
# Attention norm 2
@@ -99,26 +100,29 @@ class TensorNameMap:
# Attention query
MODEL_TENSOR.ATTN_Q: (
- "model.layers.{bid}.self_attn.q_proj", # llama-hf
- "layers.{bid}.attention.wq", # llama-pth
- "encoder.layer.{bid}.attention.self.query", # bert
- "transformer.h.{bid}.attn.q_proj", # gpt-j
+ "model.layers.{bid}.self_attn.q_proj", # llama-hf
+ "layers.{bid}.attention.wq", # llama-pth
+ "encoder.layer.{bid}.attention.self.query", # bert
+ "transformer.h.{bid}.attn.q_proj", # gpt-j
+ "model.layers.layers.{bid}.self_attn.q_proj", # plamo
),
# Attention key
MODEL_TENSOR.ATTN_K: (
- "model.layers.{bid}.self_attn.k_proj", # llama-hf
- "layers.{bid}.attention.wk", # llama-pth
- "encoder.layer.{bid}.attention.self.key", # bert
- "transformer.h.{bid}.attn.k_proj", # gpt-j
+ "model.layers.{bid}.self_attn.k_proj", # llama-hf
+ "layers.{bid}.attention.wk", # llama-pth
+ "encoder.layer.{bid}.attention.self.key", # bert
+ "transformer.h.{bid}.attn.k_proj", # gpt-j
+ "model.layers.layers.{bid}.self_attn.k_proj", # plamo
),
# Attention value
MODEL_TENSOR.ATTN_V: (
- "model.layers.{bid}.self_attn.v_proj", # llama-hf
- "layers.{bid}.attention.wv", # llama-pth
- "encoder.layer.{bid}.attention.self.value", # bert
- "transformer.h.{bid}.attn.v_proj", # gpt-j
+ "model.layers.{bid}.self_attn.v_proj", # llama-hf
+ "layers.{bid}.attention.wv", # llama-pth
+ "encoder.layer.{bid}.attention.self.value", # bert
+ "transformer.h.{bid}.attn.v_proj", # gpt-j
+ "model.layers.layers.{bid}.self_attn.v_proj", # plamo
),
# Attention output
@@ -134,12 +138,14 @@ class TensorNameMap:
"transformer.h.{bid}.attn.out_proj", # gpt-j
"language_model.encoder.layers.{bid}.self_attention.dense", # persimmon
"transformer.h.{bid}.mixer.out_proj", # phi2
+ "model.layers.layers.{bid}.self_attn.o_proj", # plamo
),
# Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: (
- "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
- "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
+ "model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
+ "layers.{bid}.attention.inner_attention.rope.freqs", # llama-pth
+ "model.layers.layers.{bid}.self_attn.rotary_emb.inv_freq", # plamo
),
# Feed-forward norm
@@ -174,6 +180,7 @@ class TensorNameMap:
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"transformer.h.{bid}.mlp.w1", # qwen
"transformer.h.{bid}.mlp.fc1", # phi2
+ "model.layers.layers.{bid}.mlp.up_proj", # plamo
),
MODEL_TENSOR.FFN_UP_EXP: (
@@ -186,6 +193,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
"layers.{bid}.feed_forward.w1", # llama-pth
"transformer.h.{bid}.mlp.w2", # qwen
+ "model.layers.layers.{bid}.mlp.gate_proj", # plamo
),
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -206,6 +214,7 @@ class TensorNameMap:
"transformer.h.{bid}.mlp.fc_out", # gpt-j
"language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon
"transformer.h.{bid}.mlp.fc2", # phi2
+ "model.layers.layers.{bid}.mlp.down_proj", # plamo
),
MODEL_TENSOR.FFN_DOWN_EXP: (