diff options
author | Jared Van Bortel <jared@nomic.ai> | 2024-02-13 12:03:53 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-13 12:03:53 -0500 |
commit | ea9c8e11436ad50719987fa23a289c74b7b40d40 (patch) | |
tree | be96ed8c8de113399a43b4017030a5ed1e1e8e3b /gguf-py | |
parent | c4e6dd59e45ef7b14f7763fb073b517395dc176c (diff) |
llama : add support for Nomic Embed (#5468)
Diffstat (limited to 'gguf-py')
-rw-r--r-- | gguf-py/gguf/constants.py | 56 | ||||
-rw-r--r-- | gguf-py/gguf/tensor_mapping.py | 12 |
2 files changed, 46 insertions, 22 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 644e1589..5fba0171 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -87,27 +87,28 @@ class Keys: class MODEL_ARCH(IntEnum): - LLAMA = auto() - FALCON = auto() - BAICHUAN = auto() - GPT2 = auto() - GPTJ = auto() - GPTNEOX = auto() - MPT = auto() - STARCODER = auto() - PERSIMMON = auto() - REFACT = auto() - BERT = auto() - BLOOM = auto() - STABLELM = auto() - QWEN = auto() - QWEN2 = auto() - PHI2 = auto() - PLAMO = auto() - CODESHELL = auto() - ORION = auto() + LLAMA = auto() + FALCON = auto() + BAICHUAN = auto() + GPT2 = auto() + GPTJ = auto() + GPTNEOX = auto() + MPT = auto() + STARCODER = auto() + PERSIMMON = auto() + REFACT = auto() + BERT = auto() + NOMIC_BERT = auto() + BLOOM = auto() + STABLELM = auto() + QWEN = auto() + QWEN2 = auto() + PHI2 = auto() + PLAMO = auto() + CODESHELL = auto() + ORION = auto() INTERNLM2 = auto() - MINICPM = auto() + MINICPM = auto() class MODEL_TENSOR(IntEnum): @@ -153,6 +154,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.PERSIMMON: "persimmon", MODEL_ARCH.REFACT: "refact", MODEL_ARCH.BERT: "bert", + MODEL_ARCH.NOMIC_BERT: "nomic-bert", MODEL_ARCH.BLOOM: "bloom", MODEL_ARCH.STABLELM: "stablelm", MODEL_ARCH.QWEN: "qwen", @@ -282,6 +284,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_TENSOR.FFN_UP, MODEL_TENSOR.LAYER_OUT_NORM, ], + MODEL_ARCH.NOMIC_BERT: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.TOKEN_EMBD_NORM, + MODEL_TENSOR.TOKEN_TYPES, + MODEL_TENSOR.POS_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.ATTN_OUT_NORM, + MODEL_TENSOR.ATTN_QKV, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.LAYER_OUT_NORM, + ], MODEL_ARCH.MPT: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index c7ba1420..86100377 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -15,7 +15,7 @@ class TensorNameMap: "word_embeddings", # bloom "model.embed_tokens", # llama-hf "tok_embeddings", # llama-pth - "embeddings.word_embeddings", # bert + "embeddings.word_embeddings", # bert nomic-bert "language_model.embedding.word_embeddings", # persimmon "wte", # gpt2 "transformer.embd.wte", # phi2 @@ -24,13 +24,14 @@ class TensorNameMap: # Token type embeddings MODEL_TENSOR.TOKEN_TYPES: ( - "embeddings.token_type_embeddings", # bert + "embeddings.token_type_embeddings", # bert nomic-bert ), # Normalization of token embeddings MODEL_TENSOR.TOKEN_EMBD_NORM: ( "word_embeddings_layernorm", # bloom "embeddings.LayerNorm", # bert + "emb_ln", # nomic-bert ), # Position embeddings @@ -103,6 +104,7 @@ class TensorNameMap: "model.layers.{bid}.self_attn.query_key_value", # persimmon "h.{bid}.attn.c_attn", # gpt2 "transformer.h.{bid}.mixer.Wqkv", # phi2 + "encoder.layers.{bid}.attn.Wqkv", # nomic-bert ), # Attention query @@ -152,11 +154,13 @@ class TensorNameMap: "transformer.h.{bid}.mixer.out_proj", # phi2 "model.layers.layers.{bid}.self_attn.o_proj", # plamo "model.layers.{bid}.attention.wo", # internlm2 + "encoder.layers.{bid}.attn.out_proj", # nomic-bert ), # Attention output norm MODEL_TENSOR.ATTN_OUT_NORM: ( "encoder.layer.{bid}.attention.output.LayerNorm", # bert + "encoder.layers.{bid}.norm1", # nomic-bert ), # Rotary embeddings @@ -205,6 +209,7 @@ class TensorNameMap: "model.layers.{bid}.mlp.fc1", # phi2 "model.layers.layers.{bid}.mlp.up_proj", # plamo "model.layers.{bid}.feed_forward.w3", # internlm2 + "encoder.layers.{bid}.mlp.fc11", # nomic-bert ), MODEL_TENSOR.FFN_UP_EXP: ( @@ -224,6 +229,7 @@ class TensorNameMap: "transformer.h.{bid}.mlp.w2", # qwen "model.layers.layers.{bid}.mlp.gate_proj", # plamo "model.layers.{bid}.feed_forward.w1", # internlm2 + "encoder.layers.{bid}.mlp.fc12", # nomic-bert ), MODEL_TENSOR.FFN_GATE_EXP: ( @@ -249,6 +255,7 @@ class TensorNameMap: "model.layers.{bid}.mlp.fc2", # phi2 "model.layers.layers.{bid}.mlp.down_proj", # plamo "model.layers.{bid}.feed_forward.w2", # internlm2 + "encoder.layers.{bid}.mlp.fc2", # nomic-bert ), MODEL_TENSOR.FFN_DOWN_EXP: ( @@ -272,6 +279,7 @@ class TensorNameMap: MODEL_TENSOR.LAYER_OUT_NORM: ( "encoder.layer.{bid}.output.LayerNorm", # bert + "encoder.layers.{bid}.norm2", # nomic-bert ) } |