summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorJared Van Bortel <jared@nomic.ai>2024-02-13 12:03:53 -0500
committerGitHub <noreply@github.com>2024-02-13 12:03:53 -0500
commitea9c8e11436ad50719987fa23a289c74b7b40d40 (patch)
treebe96ed8c8de113399a43b4017030a5ed1e1e8e3b /gguf-py
parentc4e6dd59e45ef7b14f7763fb073b517395dc176c (diff)
llama : add support for Nomic Embed (#5468)
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py56
-rw-r--r--gguf-py/gguf/tensor_mapping.py12
2 files changed, 46 insertions, 22 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 644e1589..5fba0171 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -87,27 +87,28 @@ class Keys:
class MODEL_ARCH(IntEnum):
- LLAMA = auto()
- FALCON = auto()
- BAICHUAN = auto()
- GPT2 = auto()
- GPTJ = auto()
- GPTNEOX = auto()
- MPT = auto()
- STARCODER = auto()
- PERSIMMON = auto()
- REFACT = auto()
- BERT = auto()
- BLOOM = auto()
- STABLELM = auto()
- QWEN = auto()
- QWEN2 = auto()
- PHI2 = auto()
- PLAMO = auto()
- CODESHELL = auto()
- ORION = auto()
+ LLAMA = auto()
+ FALCON = auto()
+ BAICHUAN = auto()
+ GPT2 = auto()
+ GPTJ = auto()
+ GPTNEOX = auto()
+ MPT = auto()
+ STARCODER = auto()
+ PERSIMMON = auto()
+ REFACT = auto()
+ BERT = auto()
+ NOMIC_BERT = auto()
+ BLOOM = auto()
+ STABLELM = auto()
+ QWEN = auto()
+ QWEN2 = auto()
+ PHI2 = auto()
+ PLAMO = auto()
+ CODESHELL = auto()
+ ORION = auto()
INTERNLM2 = auto()
- MINICPM = auto()
+ MINICPM = auto()
class MODEL_TENSOR(IntEnum):
@@ -153,6 +154,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.PERSIMMON: "persimmon",
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
+ MODEL_ARCH.NOMIC_BERT: "nomic-bert",
MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
@@ -282,6 +284,20 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
+ MODEL_ARCH.NOMIC_BERT: [
+ MODEL_TENSOR.TOKEN_EMBD,
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
+ MODEL_TENSOR.TOKEN_TYPES,
+ MODEL_TENSOR.POS_EMBD,
+ MODEL_TENSOR.OUTPUT_NORM,
+ MODEL_TENSOR.ATTN_OUT_NORM,
+ MODEL_TENSOR.ATTN_QKV,
+ MODEL_TENSOR.ATTN_OUT,
+ MODEL_TENSOR.FFN_GATE,
+ MODEL_TENSOR.FFN_DOWN,
+ MODEL_TENSOR.FFN_UP,
+ MODEL_TENSOR.LAYER_OUT_NORM,
+ ],
MODEL_ARCH.MPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index c7ba1420..86100377 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -15,7 +15,7 @@ class TensorNameMap:
"word_embeddings", # bloom
"model.embed_tokens", # llama-hf
"tok_embeddings", # llama-pth
- "embeddings.word_embeddings", # bert
+ "embeddings.word_embeddings", # bert nomic-bert
"language_model.embedding.word_embeddings", # persimmon
"wte", # gpt2
"transformer.embd.wte", # phi2
@@ -24,13 +24,14 @@ class TensorNameMap:
# Token type embeddings
MODEL_TENSOR.TOKEN_TYPES: (
- "embeddings.token_type_embeddings", # bert
+ "embeddings.token_type_embeddings", # bert nomic-bert
),
# Normalization of token embeddings
MODEL_TENSOR.TOKEN_EMBD_NORM: (
"word_embeddings_layernorm", # bloom
"embeddings.LayerNorm", # bert
+ "emb_ln", # nomic-bert
),
# Position embeddings
@@ -103,6 +104,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.query_key_value", # persimmon
"h.{bid}.attn.c_attn", # gpt2
"transformer.h.{bid}.mixer.Wqkv", # phi2
+ "encoder.layers.{bid}.attn.Wqkv", # nomic-bert
),
# Attention query
@@ -152,11 +154,13 @@ class TensorNameMap:
"transformer.h.{bid}.mixer.out_proj", # phi2
"model.layers.layers.{bid}.self_attn.o_proj", # plamo
"model.layers.{bid}.attention.wo", # internlm2
+ "encoder.layers.{bid}.attn.out_proj", # nomic-bert
),
# Attention output norm
MODEL_TENSOR.ATTN_OUT_NORM: (
"encoder.layer.{bid}.attention.output.LayerNorm", # bert
+ "encoder.layers.{bid}.norm1", # nomic-bert
),
# Rotary embeddings
@@ -205,6 +209,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.fc1", # phi2
"model.layers.layers.{bid}.mlp.up_proj", # plamo
"model.layers.{bid}.feed_forward.w3", # internlm2
+ "encoder.layers.{bid}.mlp.fc11", # nomic-bert
),
MODEL_TENSOR.FFN_UP_EXP: (
@@ -224,6 +229,7 @@ class TensorNameMap:
"transformer.h.{bid}.mlp.w2", # qwen
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
+ "encoder.layers.{bid}.mlp.fc12", # nomic-bert
),
MODEL_TENSOR.FFN_GATE_EXP: (
@@ -249,6 +255,7 @@ class TensorNameMap:
"model.layers.{bid}.mlp.fc2", # phi2
"model.layers.layers.{bid}.mlp.down_proj", # plamo
"model.layers.{bid}.feed_forward.w2", # internlm2
+ "encoder.layers.{bid}.mlp.fc2", # nomic-bert
),
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -272,6 +279,7 @@ class TensorNameMap:
MODEL_TENSOR.LAYER_OUT_NORM: (
"encoder.layer.{bid}.output.LayerNorm", # bert
+ "encoder.layers.{bid}.norm2", # nomic-bert
)
}