summaryrefslogtreecommitdiff
path: root/convert-hf-to-gguf.py
diff options
context:
space:
mode:
authorJoan Fontanals <jfontanalsmartinez@gmail.com>2024-05-11 09:46:09 +0200
committerGitHub <noreply@github.com>2024-05-11 10:46:09 +0300
commitb83cc3f5b303ff30c52874b2d5864dc6385ebf9f (patch)
tree912fe3b8df9eb7b39ae7f4321f080b924628336b /convert-hf-to-gguf.py
parent9cb317f77e53067f7a138cc89ef7657148eae8e6 (diff)
llama : add Jina Embeddings architecture (#6826)
* feat: first things to do * feat: create tensors for Jina architecture * fix: use other tensors * feat: embedding gets results * fix: fix usage of ALIBI * fix: clean prints * fix: do some cleanup unused vars * fix: revert changes to Makefile and CMakeLists * fix: revert some changes * fix: fix small detail * fix: fix convert formatting * fix: fix linting and editor * feat: set proper vocab settings * fix: JinaBertForMaskedLM registration * feat: support q_normalization and k_normalization in Jina arch * feat: handle gpt2 tokenizer with Jina architecture * feat: example comments in embedding * feat: rename Jina Bert to Jina Bert V2 * fix: add some changes as per review * feat: proper KQ_pos for Jina embeddings * feat: add capacity to load models ES and DE for Spanish * llama : fix pre-tokenizers * ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * minor : clean-up * embedding : add warning about missing SEP --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'convert-hf-to-gguf.py')
-rwxr-xr-xconvert-hf-to-gguf.py48
1 files changed, 47 insertions, 1 deletions
diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 3315ca74..fbaed64d 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -404,8 +404,17 @@ class Model:
# ref: https://huggingface.co/allenai/OLMo-1.7-7B-hf
res = "olmo"
if chkhsh == "a8594e3edff7c29c003940395316294b2c623e09894deebbc65f33f1515df79e":
- # ref: https://huggingface.co/databricks/dbrx-instruct
+ # ref: https://huggingface.co/databricks/dbrx-base
res = "dbrx"
+ if chkhsh == "0876d13b50744004aa9aeae05e7b0647eac9d801b5ba4668afc01e709c15e19f":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-en
+ res = "jina-en"
+ if chkhsh == "171aeeedd6fb548d418a7461d053f11b6f1f1fc9b387bd66640d28a4b9f5c643":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-es
+ res = "jina-es"
+ if chkhsh == "27949a2493fc4a9f53f5b9b029c82689cfbe5d3a1929bb25e043089e28466de6":
+ # ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-de
+ res = "jina-de"
if res is None:
logger.warning("\n")
@@ -2289,6 +2298,43 @@ class OlmoModel(Model):
return [(self.map_tensor_name(name), data_torch)]
+@Model.register("JinaBertModel", "JinaBertForMaskedLM")
+class JinaBertV2Model(BertModel):
+ model_arch = gguf.MODEL_ARCH.JINA_BERT_V2
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ self.intermediate_size = self.hparams["intermediate_size"]
+
+ def get_tensors(self):
+ for name, data in super().get_tensors():
+ if 'gated_layers' in name:
+ d1 = data[:self.intermediate_size, :]
+ name1 = name.replace('gated_layers', 'gated_layers_w')
+ d2 = data[self.intermediate_size:, :]
+ name2 = name.replace('gated_layers', 'gated_layers_v')
+ yield name1, d1
+ yield name2, d2
+ continue
+
+ yield name, data
+
+ def set_vocab(self, *args, **kwargs):
+ tokenizer_class = 'BertTokenizer'
+ with open(self.dir_model / "tokenizer_config.json", "r", encoding="utf-8") as f:
+ tokenizer_class = json.load(f)['tokenizer_class']
+
+ if tokenizer_class == 'BertTokenizer':
+ super().set_vocab()
+ elif tokenizer_class == 'RobertaTokenizer':
+ self._set_vocab_gpt2()
+ self.gguf_writer.add_token_type_count(2)
+ else:
+ raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
+ self.gguf_writer.add_add_bos_token(True)
+ self.gguf_writer.add_add_eos_token(True)
+
+
###### CONVERSION LOGIC ######