summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/tensor_mapping.py
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-07-27 07:55:01 +0200
committerGitHub <noreply@github.com>2024-07-27 07:55:01 +0200
commit154e0d75fccf1784fe9ff6fd76a630b66563da3d (patch)
tree81ce6dbb5b1900c1aa78a879f0593c694cab9d27 /gguf-py/gguf/tensor_mapping.py
parent0684c3e9c70d49323b4fc517128cbe222cab7f96 (diff)
Merge mainline llama.cpp (#3)
* Merging mainline - WIP * Merging mainline - WIP AVX2 and CUDA appear to work. CUDA performance seems slightly (~1-2%) lower as it is so often the case with llama.cpp/ggml after some "improvements" have been made. * Merging mainline - fix Metal * Remove check --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r--gguf-py/gguf/tensor_mapping.py192
1 files changed, 170 insertions, 22 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 350035bd..9aa2209e 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -10,7 +10,7 @@ class TensorNameMap:
# Token embeddings
MODEL_TENSOR.TOKEN_EMBD: (
"gpt_neox.embed_in", # gptneox
- "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx
+ "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais
"transformer.word_embeddings", # falcon
"word_embeddings", # bloom
"model.embed_tokens", # llama-hf
@@ -24,6 +24,9 @@ class TensorNameMap:
"backbone.embedding", # mamba
"backbone.embeddings", # mamba-hf
"transformer.in_out_embed", # Grok
+ "embedding.word_embeddings", # chatglm
+ "transformer.token_embeddings", # openelm
+ "shared", # t5
),
# Token type embeddings
@@ -36,6 +39,7 @@ class TensorNameMap:
"word_embeddings_layernorm", # bloom
"embeddings.LayerNorm", # bert
"emb_ln", # nomic-bert
+ "transformer.norm", # openelm
),
# Position embeddings
@@ -48,16 +52,17 @@ class TensorNameMap:
# Output
MODEL_TENSOR.OUTPUT: (
"embed_out", # gptneox
- "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx
+ "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais
"output", # llama-pth bloom internlm2
"word_embeddings_for_head", # persimmon
"lm_head.linear", # phi2
+ "output_layer", # chatglm
),
# Output norm
MODEL_TENSOR.OUTPUT_NORM: (
"gpt_neox.final_layer_norm", # gptneox
- "transformer.ln_f", # gpt2 gpt-j falcon
+ "transformer.ln_f", # gpt2 gpt-j falcon jais
"model.norm", # llama-hf baichuan internlm2
"norm", # llama-pth
"transformer.norm_f", # mpt dbrx
@@ -68,11 +73,14 @@ class TensorNameMap:
"model.norm_f", # mamba-qbert
"backbone.norm_f", # mamba
"transformer.rms_norm", # Grok
+ "encoder.final_layernorm", # chatglm
+ "transformer.norm", # openelm
),
# Rope frequencies
MODEL_TENSOR.ROPE_FREQS: (
"rope.freqs", # llama-pth
+ "rotary_pos_emb.inv_freq", # chatglm
),
}
@@ -80,7 +88,7 @@ class TensorNameMap:
# Attention norm
MODEL_TENSOR.ATTN_NORM: (
"gpt_neox.layers.{bid}.input_layernorm", # gptneox
- "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen
+ "transformer.h.{bid}.ln_1", # gpt2 gpt-j refact qwen jais
"transformer.blocks.{bid}.norm_1", # mpt
"transformer.h.{bid}.input_layernorm", # falcon7b
"h.{bid}.input_layernorm", # bloom
@@ -97,6 +105,8 @@ class TensorNameMap:
"backbone.layers.{bid}.norm", # mamba
"transformer.decoder_layer.{bid}.rms_norm", # Grok
"transformer.blocks.{bid}.norm_attn_norm.norm_1", # dbrx
+ "encoder.layers.{bid}.input_layernorm", # chatglm
+ "transformer.layers.{bid}.attn_norm", # openelm
),
# Attention norm 2
@@ -108,7 +118,7 @@ class TensorNameMap:
# Attention query-key-value
MODEL_TENSOR.ATTN_QKV: (
"gpt_neox.layers.{bid}.attention.query_key_value", # gptneox
- "transformer.h.{bid}.attn.c_attn", # gpt2 qwen
+ "transformer.h.{bid}.attn.c_attn", # gpt2 qwen jais
"transformer.blocks.{bid}.attn.Wqkv", # mpt
"transformer.blocks.{bid}.norm_attn_norm.attn.Wqkv", # dbrx
"transformer.h.{bid}.self_attention.query_key_value", # falcon
@@ -118,7 +128,9 @@ class TensorNameMap:
"h.{bid}.attn.c_attn", # gpt2
"transformer.h.{bid}.mixer.Wqkv", # phi2
"encoder.layers.{bid}.attn.Wqkv", # nomic-bert
- "model.layers.{bid}.self_attn.qkv_proj" # phi3
+ "model.layers.{bid}.self_attn.qkv_proj", # phi3
+ "encoder.layers.{bid}.self_attention.query_key_value", # chatglm
+ "transformer.layers.{bid}.attn.qkv_proj", # openelm
),
# Attention query
@@ -129,7 +141,7 @@ class TensorNameMap:
"transformer.h.{bid}.attn.q_proj", # gpt-j
"model.layers.layers.{bid}.self_attn.q_proj", # plamo
"model.layers.{bid}.attention.wq", # internlm2
- "transformer.decoder_layer.{bid}.multi_head_attention.query" # Grok
+ "transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
),
# Attention key
@@ -141,7 +153,7 @@ class TensorNameMap:
"transformer.h.{bid}.attn.k", # refact
"model.layers.layers.{bid}.self_attn.k_proj", # plamo
"model.layers.{bid}.attention.wk", # internlm2
- "transformer.decoder_layer.{bid}.multi_head_attention.key" # Grok
+ "transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
),
# Attention value
@@ -159,7 +171,7 @@ class TensorNameMap:
# Attention output
MODEL_TENSOR.ATTN_OUT: (
"gpt_neox.layers.{bid}.attention.dense", # gptneox
- "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen
+ "transformer.h.{bid}.attn.c_proj", # gpt2 refact qwen jais
"transformer.blocks.{bid}.attn.out_proj", # mpt
"transformer.h.{bid}.self_attention.dense", # falcon
"h.{bid}.self_attention.dense", # bloom
@@ -176,6 +188,8 @@ class TensorNameMap:
"encoder.layers.{bid}.attn.out_proj", # nomic-bert
"transformer.decoder_layer.{bid}.multi_head_attention.linear", # Grok
"transformer.blocks.{bid}.norm_attn_norm.attn.out_proj", # dbrx
+ "encoder.layers.{bid}.self_attention.dense", # chatglm
+ "transformer.layers.{bid}.attn.out_proj", # openelm
),
# Attention output norm
@@ -186,6 +200,10 @@ class TensorNameMap:
"transformer.blocks.{bid}.norm_attn_norm.norm_2", # dbrx
),
+ MODEL_TENSOR.ATTN_POST_NORM: (
+ "model.layers.{bid}.post_attention_layernorm", # gemma2
+ ),
+
# Rotary embeddings
MODEL_TENSOR.ATTN_ROT_EMBD: (
"model.layers.{bid}.self_attn.rotary_emb.inv_freq", # llama-hf
@@ -197,7 +215,7 @@ class TensorNameMap:
# Feed-forward norm
MODEL_TENSOR.FFN_NORM: (
"gpt_neox.layers.{bid}.post_attention_layernorm", # gptneox
- "transformer.h.{bid}.ln_2", # gpt2 refact qwen
+ "transformer.h.{bid}.ln_2", # gpt2 refact qwen jais
"h.{bid}.post_attention_layernorm", # bloom
"transformer.blocks.{bid}.norm_2", # mpt
"model.layers.{bid}.post_attention_layernorm", # llama-hf
@@ -207,6 +225,18 @@ class TensorNameMap:
"h.{bid}.ln_2", # gpt2
"model.layers.{bid}.ffn_norm", # internlm2
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
+ "encoder.layers.{bid}.post_attention_layernorm", # chatglm
+ "transformer.layers.{bid}.ffn_norm", # openelm
+ ),
+
+ # Post feed-forward norm
+ MODEL_TENSOR.FFN_PRE_NORM: (
+ "model.layers.{bid}.pre_feedforward_layernorm", # gemma2
+ ),
+
+ # Post feed-forward norm
+ MODEL_TENSOR.FFN_POST_NORM: (
+ "model.layers.{bid}.post_feedforward_layernorm", # gemma2
),
MODEL_TENSOR.FFN_GATE_INP: (
@@ -224,7 +254,7 @@ class TensorNameMap:
# Feed-forward up
MODEL_TENSOR.FFN_UP: (
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox
- "transformer.h.{bid}.mlp.c_fc", # gpt2
+ "transformer.h.{bid}.mlp.c_fc", # gpt2 jais
"transformer.blocks.{bid}.ffn.up_proj", # mpt
"transformer.h.{bid}.mlp.dense_h_to_4h", # falcon
"h.{bid}.mlp.dense_h_to_4h", # bloom
@@ -246,6 +276,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.c_fc", # starcoder2
"encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
"model.layers.{bid}.residual_mlp.w3", # arctic
+ "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
),
MODEL_TENSOR.FFN_UP_EXP: (
@@ -270,6 +301,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.gate_proj", # llama-hf refact
"layers.{bid}.feed_forward.w1", # llama-pth
"transformer.h.{bid}.mlp.w2", # qwen
+ "transformer.h.{bid}.mlp.c_fc2", # jais
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
@@ -293,7 +325,7 @@ class TensorNameMap:
# Feed-forward down
MODEL_TENSOR.FFN_DOWN: (
"gpt_neox.layers.{bid}.mlp.dense_4h_to_h", # gptneox
- "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen
+ "transformer.h.{bid}.mlp.c_proj", # gpt2 refact qwen jais
"transformer.blocks.{bid}.ffn.down_proj", # mpt
"transformer.h.{bid}.mlp.dense_4h_to_h", # falcon
"h.{bid}.mlp.dense_4h_to_h", # bloom
@@ -311,8 +343,10 @@ class TensorNameMap:
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
"model.layers.{bid}.mlp.c_proj", # starcoder2
"encoder.layer.{bid}.mlp.wo", # jina-bert-v2
+ "transformer.layers.{bid}.ffn.proj_2", # openelm
"model.layers.{bid}.residual_mlp.w2", # arctic
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
+ "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
),
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -332,7 +366,8 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
- "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
+ "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2
+ "transformer.layers.{bid}.attn.q_norm", # openelm
),
MODEL_TENSOR.ATTN_K_NORM: (
@@ -340,7 +375,8 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
- "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
+ "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2
+ "transformer.layers.{bid}.attn.k_norm", # openelm
),
MODEL_TENSOR.ROPE_FREQS: (
@@ -421,6 +457,120 @@ class TensorNameMap:
MODEL_TENSOR.FFN_SUB_NORM: (
"model.layers.{bid}.mlp.ffn_layernorm", # bitnet
),
+
+ MODEL_TENSOR.DEC_ATTN_NORM: (
+ "decoder.block.{bid}.layer.0.layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.DEC_ATTN_Q: (
+ "decoder.block.{bid}.layer.0.SelfAttention.q", # t5
+ ),
+
+ MODEL_TENSOR.DEC_ATTN_K: (
+ "decoder.block.{bid}.layer.0.SelfAttention.k", # t5
+ ),
+
+ MODEL_TENSOR.DEC_ATTN_V: (
+ "decoder.block.{bid}.layer.0.SelfAttention.v", # t5
+ ),
+
+ MODEL_TENSOR.DEC_ATTN_OUT: (
+ "decoder.block.{bid}.layer.0.SelfAttention.o", # t5
+ ),
+
+ MODEL_TENSOR.DEC_ATTN_REL_B: (
+ "decoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_NORM: (
+ "decoder.block.{bid}.layer.1.layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_Q: (
+ "decoder.block.{bid}.layer.1.EncDecAttention.q", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_K: (
+ "decoder.block.{bid}.layer.1.EncDecAttention.k", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_V: (
+ "decoder.block.{bid}.layer.1.EncDecAttention.v", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_OUT: (
+ "decoder.block.{bid}.layer.1.EncDecAttention.o", # t5
+ ),
+
+ MODEL_TENSOR.DEC_CROSS_ATTN_REL_B: (
+ "decoder.block.{bid}.layer.1.EncDecAttention.relative_attention_bias", # t5
+ ),
+
+ MODEL_TENSOR.DEC_FFN_NORM: (
+ "decoder.block.{bid}.layer.2.layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.DEC_FFN_GATE: (
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_0", # flan-t5
+ ),
+
+ MODEL_TENSOR.DEC_FFN_UP: (
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi", # t5
+ "decoder.block.{bid}.layer.2.DenseReluDense.wi_1", # flan-t5
+ ),
+
+ MODEL_TENSOR.DEC_FFN_DOWN: (
+ "decoder.block.{bid}.layer.2.DenseReluDense.wo", # t5
+ ),
+
+ MODEL_TENSOR.DEC_OUTPUT_NORM: (
+ "decoder.final_layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_NORM: (
+ "encoder.block.{bid}.layer.0.layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_Q: (
+ "encoder.block.{bid}.layer.0.SelfAttention.q", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_K: (
+ "encoder.block.{bid}.layer.0.SelfAttention.k", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_V: (
+ "encoder.block.{bid}.layer.0.SelfAttention.v", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_OUT: (
+ "encoder.block.{bid}.layer.0.SelfAttention.o", # t5
+ ),
+
+ MODEL_TENSOR.ENC_ATTN_REL_B: (
+ "encoder.block.{bid}.layer.0.SelfAttention.relative_attention_bias", # t5
+ ),
+
+ MODEL_TENSOR.ENC_FFN_NORM: (
+ "encoder.block.{bid}.layer.1.layer_norm", # t5
+ ),
+
+ MODEL_TENSOR.ENC_FFN_GATE: (
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_0", # flan-t5
+ ),
+
+ MODEL_TENSOR.ENC_FFN_UP: (
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi", # t5
+ "encoder.block.{bid}.layer.1.DenseReluDense.wi_1", # flan-t5
+ ),
+
+ MODEL_TENSOR.ENC_FFN_DOWN: (
+ "encoder.block.{bid}.layer.1.DenseReluDense.wo", # t5
+ ),
+
+ MODEL_TENSOR.ENC_OUTPUT_NORM: (
+ "encoder.final_layer_norm", # t5
+ ),
}
# architecture-specific block mappings
@@ -452,14 +602,12 @@ class TensorNameMap:
for tensor, keys in self.block_mappings_cfg.items():
if tensor not in MODEL_TENSORS[arch]:
continue
- # TODO: make this configurable
- n_experts = 160
- for xid in range(n_experts):
- tensor_name = TENSOR_NAMES[tensor].format(bid = bid, xid = xid)
- self.mapping[tensor_name] = (tensor, tensor_name)
- for key in keys:
- key = key.format(bid = bid, xid = xid)
- self.mapping[key] = (tensor, tensor_name)
+
+ tensor_name = TENSOR_NAMES[tensor].format(bid = bid)
+ self.mapping[tensor_name] = (tensor, tensor_name)
+ for key in keys:
+ key = key.format(bid = bid)
+ self.mapping[key] = (tensor, tensor_name)
def get_type_and_name(self, key: str, try_suffixes: Sequence[str] = ()) -> tuple[MODEL_TENSOR, str] | None:
result = self.mapping.get(key)