summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorJoan Fontanals <jfontanalsmartinez@gmail.com>2024-05-11 09:46:09 +0200
committerGitHub <noreply@github.com>2024-05-11 10:46:09 +0300
commitb83cc3f5b303ff30c52874b2d5864dc6385ebf9f (patch)
tree912fe3b8df9eb7b39ae7f4321f080b924628336b /gguf-py
parent9cb317f77e53067f7a138cc89ef7657148eae8e6 (diff)
llama : add Jina Embeddings architecture (#6826)
* feat: first things to do * feat: create tensors for Jina architecture * fix: use other tensors * feat: embedding gets results * fix: fix usage of ALIBI * fix: clean prints * fix: do some cleanup unused vars * fix: revert changes to Makefile and CMakeLists * fix: revert some changes * fix: fix small detail * fix: fix convert formatting * fix: fix linting and editor * feat: set proper vocab settings * fix: JinaBertForMaskedLM registration * feat: support q_normalization and k_normalization in Jina arch * feat: handle gpt2 tokenizer with Jina architecture * feat: example comments in embedding * feat: rename Jina Bert to Jina Bert V2 * fix: add some changes as per review * feat: proper KQ_pos for Jina embeddings * feat: add capacity to load models ES and DE for Spanish * llama : fix pre-tokenizers * ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * minor : clean-up * embedding : add warning about missing SEP --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py18
-rw-r--r--gguf-py/gguf/tensor_mapping.py6
2 files changed, 24 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 5951c0bb..a4fbfc5e 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -118,6 +118,7 @@ class MODEL_ARCH(IntEnum):
REFACT = auto()
BERT = auto()
NOMIC_BERT = auto()
+ JINA_BERT_V2 = auto()
BLOOM = auto()
STABLELM = auto()
QWEN = auto()
@@ -195,6 +196,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.REFACT: "refact",
MODEL_ARCH.BERT: "bert",
MODEL_ARCH.NOMIC_BERT: "nomic-bert",
+ MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2",
MODEL_ARCH.BLOOM: "bloom",
MODEL_ARCH.STABLELM: "stablelm",
MODEL_ARCH.QWEN: "qwen",
@@ -380,6 +382,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_UP,
MODEL_TENSOR.LAYER_OUT_NORM,
],
+ MODEL_ARCH.JINA_BERT_V2: [
+ MODEL_TENSOR.TOKEN_EMBD,
+ MODEL_TENSOR.TOKEN_EMBD_NORM,
+ MODEL_TENSOR.TOKEN_TYPES,
+ MODEL_TENSOR.ATTN_OUT_NORM,
+ MODEL_TENSOR.ATTN_Q,
+ MODEL_TENSOR.ATTN_Q_NORM,
+ MODEL_TENSOR.ATTN_K,
+ MODEL_TENSOR.ATTN_K_NORM,
+ MODEL_TENSOR.ATTN_V,
+ MODEL_TENSOR.ATTN_OUT,
+ MODEL_TENSOR.FFN_UP,
+ MODEL_TENSOR.FFN_GATE,
+ MODEL_TENSOR.FFN_DOWN,
+ MODEL_TENSOR.LAYER_OUT_NORM,
+ ],
MODEL_ARCH.MPT: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py
index 990fe63c..8e1cac91 100644
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -243,6 +243,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w3", # internlm2
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
"model.layers.{bid}.mlp.c_fc", # starcoder2
+ "encoder.layer.{bid}.mlp.gated_layers_v", # jina-bert-v2
),
MODEL_TENSOR.FFN_UP_EXP: (
@@ -269,6 +270,7 @@ class TensorNameMap:
"model.layers.layers.{bid}.mlp.gate_proj", # plamo
"model.layers.{bid}.feed_forward.w1", # internlm2
"encoder.layers.{bid}.mlp.fc12", # nomic-bert
+ "encoder.layer.{bid}.mlp.gated_layers_w", # jina-bert-v2
"transformer.h.{bid}.mlp.linear_1", # refact
),
@@ -303,6 +305,7 @@ class TensorNameMap:
"model.layers.{bid}.feed_forward.w2", # internlm2
"encoder.layers.{bid}.mlp.fc2", # nomic-bert
"model.layers.{bid}.mlp.c_proj", # starcoder2
+ "encoder.layer.{bid}.mlp.wo", # jina-bert-v2
),
MODEL_TENSOR.FFN_DOWN_EXP: (
@@ -321,6 +324,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.q_layernorm", # persimmon
"model.layers.{bid}.self_attn.q_norm", # cohere
"transformer.blocks.{bid}.attn.q_ln", # sea-lion
+ "encoder.layer.{bid}.attention.self.layer_norm_q" # jina-bert-v2
),
MODEL_TENSOR.ATTN_K_NORM: (
@@ -328,6 +332,7 @@ class TensorNameMap:
"model.layers.{bid}.self_attn.k_layernorm", # persimmon
"model.layers.{bid}.self_attn.k_norm", # cohere
"transformer.blocks.{bid}.attn.k_ln", # sea-lion
+ "encoder.layer.{bid}.attention.self.layer_norm_k" # jina-bert-v2
),
MODEL_TENSOR.ROPE_FREQS: (
@@ -338,6 +343,7 @@ class TensorNameMap:
"encoder.layer.{bid}.output.LayerNorm", # bert
"encoder.layers.{bid}.norm2", # nomic-bert
"transformer.decoder_layer.{bid}.rms_norm_3", # Grok
+ "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2
),
MODEL_TENSOR.SSM_IN: (