Add support for BERT embedding models (#5423)

* BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Douglas Hanley <thesecretaryofwar@gmail.com> 2024-02-11 10:21:38 -0600
committer: GitHub <noreply@github.com> 2024-02-11 11:21:38 -0500
commit: 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 (patch)
tree: 1a037e8ad635aa54ddf8ab8cb39c04bb4f8cf141 /gguf-py/gguf/gguf_writer.py
parent: 97a336507ed9b971d72262bec7e2b8b7016a054a (diff)
1 files changed, 6 insertions, 0 deletions
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index 16808196..7af58a46 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -357,6 +357,9 @@ class GGUFWriter:
     def add_layer_norm_rms_eps(self, value: float) -> None:
         self.add_float32(Keys.Attention.LAYERNORM_RMS_EPS.format(arch=self.arch), value)
 
+    def add_causal_attention(self, value: bool) -> None:
+        self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
+
     def add_rope_dimension_count(self, count: int) -> None:
         self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
 
@@ -387,6 +390,9 @@ class GGUFWriter:
     def add_token_types(self, types: Sequence[TokenType] | Sequence[int]) -> None:
         self.add_array(Keys.Tokenizer.TOKEN_TYPE, types)
 
+    def add_token_type_count(self, value: int) -> None:
+        self.add_uint32(Keys.Tokenizer.TOKEN_TYPE_COUNT, value)
+
     def add_token_scores(self, scores: Sequence[float]) -> None:
         self.add_array(Keys.Tokenizer.SCORES, scores)
author	Douglas Hanley <thesecretaryofwar@gmail.com>	2024-02-11 10:21:38 -0600
committer	GitHub <noreply@github.com>	2024-02-11 11:21:38 -0500
commit	2891c8aa9af17f4ff636ff3868bc34ff72b56e25 (patch)
tree	1a037e8ad635aa54ddf8ab8cb39c04bb4f8cf141 /gguf-py/gguf/gguf_writer.py
parent	97a336507ed9b971d72262bec7e2b8b7016a054a (diff)