summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/tensor_mapping.py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r--gguf-py/gguf/tensor_mapping.py4
1 files changed, 4 insertions, 0 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index e5750d41..990fe63c 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -137,6 +137,7 @@ class TensorNameMap:
"layers.{bid}.attention.wk", # llama-pth
"encoder.layer.{bid}.attention.self.key", # bert
"transformer.h.{bid}.attn.k_proj", # gpt-j
+ "transformer.h.{bid}.attn.k", # refact
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
"model.layers.{bid}.attention.wk", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
@@ -148,6 +149,7 @@ class TensorNameMap:
"layers.{bid}.attention.wv", # llama-pth
"encoder.layer.{bid}.attention.self.value", # bert
"transformer.h.{bid}.attn.v_proj", # gpt-j
+ "transformer.h.{bid}.attn.v", # refact
"model.layers.layers.{bid}.self_attn.v_proj", # plamo
"model.layers.{bid}.attention.wv", # internlm2
"transformer.decoder_layer.{bid}.multi_head_attention.value" # Grok
@@ -229,6 +231,7 @@ class TensorNameMap:
"layers.{bid}.feed_forward.w3", # llama-pth
"encoder.layer.{bid}.intermediate.dense", # bert
"transformer.h.{bid}.mlp.fc_in", # gpt-j
+ "transformer.h.{bid}.mlp.linear_3", # refact
"language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"model.layers.{bid}.mlp.dense_h_to_4h", # persimmon
"transformer.h.{bid}.mlp.w1", # qwen
@@ -266,6 +269,7 @@ class TensorNameMap:
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
+ "transformer.h.{bid}.mlp.linear_1", # refact
),
MODEL_TENSOR.FFN_GATE_EXP: (