diff options
author | Ebey Abraham <ebey97@gmail.com> | 2023-12-18 17:27:47 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-18 19:27:47 +0200 |
commit | b9e74f9bca5fdf7d0a22ed25e7a9626335fdfa48 (patch) | |
tree | b150a0d4490627bfc9cdd758d08d026fc70b0882 /gguf-py/gguf/tensor_mapping.py | |
parent | 3c04bf6da89eaf4c7d317e0518f0687dfcbf2de7 (diff) |
llama : add phi-2 + fix NeoX rope + ggml_mul_mat_set_prec (#4490)
* phi2 implementation
* fix breaking change
* phi-2 : various fixes
* phi-2 : use layer norm eps
* py : whitespaces
* llama : fix meta KV override bug
* convert : phi don't add BOS token
* convert : revert "added_tokens_decoder" change
* phi-2 : scale Q instead of KQ for better precision
* ggml : fix NeoX rope to rotate just first n_dims
* cuda : less diff in the rope_neox kernel
* ggml : add ggml_mul_mat_set_prec
ggml-ci
* Update ggml-cuda.cu
Co-authored-by: slaren <slarengh@gmail.com>
* Update ggml-cuda.cu
Co-authored-by: slaren <slarengh@gmail.com>
* cuda : ggml_cuda_op_mul_mat_cublas support F32 precision
* cuda : remove oboslete comment
---------
Co-authored-by: Ebey Abraham <ebeyabraham@microsoft.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'gguf-py/gguf/tensor_mapping.py')
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 8 |
1 files changed, 8 insertions, 0 deletions
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 0115ea1c..6fcbdbc1 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -17,6 +17,7 @@ class TensorNameMap: "tok_embeddings", # llama-pth "embeddings.word_embeddings", # bert "language_model.embedding.word_embeddings", # persimmon + "transformer.embd.wte", # phi2 ), # Token type embeddings @@ -41,6 +42,7 @@ class TensorNameMap: "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen "output", # llama-pth bloom "word_embeddings_for_head", # persimmon + "lm_head.linear", # phi2 ), # Output norm @@ -53,6 +55,7 @@ class TensorNameMap: "transformer.norm_f", # mpt "ln_f", # refact bloom qwen "language_model.encoder.final_layernorm", # persimmon + "lm_head.ln", # phi2 ), # Rope frequencies @@ -75,6 +78,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.output.LayerNorm", # bert "language_model.encoder.layers.{bid}.input_layernorm", # persimmon "model.layers.{bid}.ln1", # yi + "transformer.h.{bid}.ln", # phi2 ), # Attention norm 2 @@ -90,6 +94,7 @@ class TensorNameMap: "transformer.h.{bid}.self_attention.query_key_value", # falcon "h.{bid}.self_attention.query_key_value", # bloom "language_model.encoder.layers.{bid}.self_attention.query_key_value", # persimmon + "transformer.h.{bid}.mixer.Wqkv", # phi2 ), # Attention query @@ -128,6 +133,7 @@ class TensorNameMap: "encoder.layer.{bid}.attention.output.dense", # bert "transformer.h.{bid}.attn.out_proj", # gpt-j "language_model.encoder.layers.{bid}.self_attention.dense", # persimmon + "transformer.h.{bid}.mixer.out_proj", # phi2 ), # Rotary embeddings @@ -167,6 +173,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.fc_in", # gpt-j "language_model.encoder.layers.{bid}.mlp.dense_h_to_4h", # persimmon "transformer.h.{bid}.mlp.w1", # qwen + "transformer.h.{bid}.mlp.fc1", # phi2 ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -198,6 +205,7 @@ class TensorNameMap: "encoder.layer.{bid}.output.dense", # bert "transformer.h.{bid}.mlp.fc_out", # gpt-j "language_model.encoder.layers.{bid}.mlp.dense_4h_to_h", # persimmon + "transformer.h.{bid}.mlp.fc2", # phi2 ), MODEL_TENSOR.FFN_DOWN_EXP: ( |