llama : add support for Nomic Embed (#5468)

author: Jared Van Bortel <jared@nomic.ai> 2024-02-13 12:03:53 -0500
committer: GitHub <noreply@github.com> 2024-02-13 12:03:53 -0500
commit: ea9c8e11436ad50719987fa23a289c74b7b40d40 (patch)
tree: be96ed8c8de113399a43b4017030a5ed1e1e8e3b /gguf-py
parent: c4e6dd59e45ef7b14f7763fb073b517395dc176c (diff)
2 files changed, 46 insertions, 22 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 644e1589..5fba0171 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -87,27 +87,28 @@ class Keys:
 
 
 class MODEL_ARCH(IntEnum):
-    LLAMA     = auto()
-    FALCON    = auto()
-    BAICHUAN  = auto()
-    GPT2      = auto()
-    GPTJ      = auto()
-    GPTNEOX   = auto()
-    MPT       = auto()
-    STARCODER = auto()
-    PERSIMMON = auto()
-    REFACT    = auto()
-    BERT      = auto()
-    BLOOM     = auto()
-    STABLELM  = auto()
-    QWEN      = auto()
-    QWEN2     = auto()
-    PHI2      = auto()
-    PLAMO     = auto()
-    CODESHELL = auto()
-    ORION     = auto()
+    LLAMA      = auto()
+    FALCON     = auto()
+    BAICHUAN   = auto()
+    GPT2       = auto()
+    GPTJ       = auto()
+    GPTNEOX    = auto()
+    MPT        = auto()
+    STARCODER  = auto()
+    PERSIMMON  = auto()
+    REFACT     = auto()
+    BERT       = auto()
+    NOMIC_BERT = auto()
+    BLOOM      = auto()
+    STABLELM   = auto()
+    QWEN       = auto()
+    QWEN2      = auto()
+    PHI2       = auto()
+    PLAMO      = auto()
+    CODESHELL  = auto()
+    ORION      = auto()
     INTERNLM2  = auto()
-    MINICPM   = auto()
+    MINICPM    = auto()
 
 
 class MODEL_TENSOR(IntEnum):
@@ -153,6 +154,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
     MODEL_ARCH.PERSIMMON:      "persimmon",
     MODEL_ARCH.REFACT:         "refact",
     MODEL_ARCH.BERT:           "bert",
+    MODEL_ARCH.NOMIC_BERT:     "nomic-bert",
     MODEL_ARCH.BLOOM:          "bloom",
     MODEL_ARCH.STABLELM:       "stablelm",
     MODEL_ARCH.QWEN:           "qwen",
@@ -282,6 +284,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
         MODEL_TENSOR.FFN_UP,
         MODEL_TENSOR.LAYER_OUT_NORM,
     ],
+    MODEL_ARCH.NOMIC_BERT: [
+        MODEL_TENSOR.TOKEN_EMBD,
+        MODEL_TENSOR.TOKEN_EMBD_NORM,
+        MODEL_TENSOR.TOKEN_TYPES,
+        MODEL_TENSOR.POS_EMBD,
+        MODEL_TENSOR.OUTPUT_NORM,
+        MODEL_TENSOR.ATTN_OUT_NORM,
+        MODEL_TENSOR.ATTN_QKV,
+        MODEL_TENSOR.ATTN_OUT,
+        MODEL_TENSOR.FFN_GATE,
+        MODEL_TENSOR.FFN_DOWN,
+        MODEL_TENSOR.FFN_UP,
+        MODEL_TENSOR.LAYER_OUT_NORM,
+    ],
     MODEL_ARCH.MPT: [
         MODEL_TENSOR.TOKEN_EMBD,
         MODEL_TENSOR.OUTPUT_NORM,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index c7ba1420..86100377 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -15,7 +15,7 @@ class TensorNameMap:
             "word_embeddings",                           # bloom
             "model.embed_tokens",                        # llama-hf
             "tok_embeddings",                            # llama-pth
-            "embeddings.word_embeddings",                # bert
+            "embeddings.word_embeddings",                # bert nomic-bert
             "language_model.embedding.word_embeddings",  # persimmon
             "wte",                                       # gpt2
             "transformer.embd.wte",                      # phi2
@@ -24,13 +24,14 @@ class TensorNameMap:
 
         # Token type embeddings
         MODEL_TENSOR.TOKEN_TYPES: (
-            "embeddings.token_type_embeddings",  # bert
+            "embeddings.token_type_embeddings",  # bert nomic-bert
         ),
 
         # Normalization of token embeddings
         MODEL_TENSOR.TOKEN_EMBD_NORM: (
             "word_embeddings_layernorm",  # bloom
             "embeddings.LayerNorm",       # bert
+            "emb_ln",                     # nomic-bert
         ),
 
         # Position embeddings
@@ -103,6 +104,7 @@ class TensorNameMap:
             "model.layers.{bid}.self_attn.query_key_value",                        # persimmon
             "h.{bid}.attn.c_attn",                                                 # gpt2
             "transformer.h.{bid}.mixer.Wqkv",                                      # phi2
+            "encoder.layers.{bid}.attn.Wqkv",                                      # nomic-bert
         ),
 
         # Attention query
@@ -152,11 +154,13 @@ class TensorNameMap:
             "transformer.h.{bid}.mixer.out_proj",                        # phi2
             "model.layers.layers.{bid}.self_attn.o_proj",                # plamo
             "model.layers.{bid}.attention.wo",                           # internlm2
+            "encoder.layers.{bid}.attn.out_proj",                        # nomic-bert
         ),
 
         # Attention output norm
         MODEL_TENSOR.ATTN_OUT_NORM: (
             "encoder.layer.{bid}.attention.output.LayerNorm",  # bert
+            "encoder.layers.{bid}.norm1",                      # nomic-bert
         ),
 
         # Rotary embeddings
@@ -205,6 +209,7 @@ class TensorNameMap:
             "model.layers.{bid}.mlp.fc1",                             # phi2
             "model.layers.layers.{bid}.mlp.up_proj",                  # plamo
             "model.layers.{bid}.feed_forward.w3",                     # internlm2
+            "encoder.layers.{bid}.mlp.fc11",                          # nomic-bert
         ),
 
         MODEL_TENSOR.FFN_UP_EXP: (
@@ -224,6 +229,7 @@ class TensorNameMap:
             "transformer.h.{bid}.mlp.w2",                 # qwen
             "model.layers.layers.{bid}.mlp.gate_proj",    # plamo
             "model.layers.{bid}.feed_forward.w1",         # internlm2
+            "encoder.layers.{bid}.mlp.fc12",              # nomic-bert
         ),
 
         MODEL_TENSOR.FFN_GATE_EXP: (
@@ -249,6 +255,7 @@ class TensorNameMap:
             "model.layers.{bid}.mlp.fc2",                             # phi2
             "model.layers.layers.{bid}.mlp.down_proj",                # plamo
             "model.layers.{bid}.feed_forward.w2",                     # internlm2
+            "encoder.layers.{bid}.mlp.fc2",                           # nomic-bert
         ),
 
         MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -272,6 +279,7 @@ class TensorNameMap:
 
         MODEL_TENSOR.LAYER_OUT_NORM: (
             "encoder.layer.{bid}.output.LayerNorm",  # bert
+            "encoder.layers.{bid}.norm2",            # nomic-bert
         )
     }
author	Jared Van Bortel <jared@nomic.ai>	2024-02-13 12:03:53 -0500
committer	GitHub <noreply@github.com>	2024-02-13 12:03:53 -0500
commit	ea9c8e11436ad50719987fa23a289c74b7b40d40 (patch)
tree	be96ed8c8de113399a43b4017030a5ed1e1e8e3b /gguf-py
parent	c4e6dd59e45ef7b14f7763fb073b517395dc176c (diff)