From 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 Mon Sep 17 00:00:00 2001
From: Douglas Hanley <thesecretaryofwar@gmail.com>
Date: Sun, 11 Feb 2024 10:21:38 -0600
Subject: Add support for BERT embedding models (#5423)

* BERT model graph construction (build_bert)
* WordPiece tokenizer (llm_tokenize_wpm)
* Add flag for non-causal attention models
* Allow for models that only output embeddings
* Support conversion of BERT models to GGUF
* Based on prior work by @xyzhang626 and @skeskinen

---------

Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 llama.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'llama.h')

diff --git a/llama.h b/llama.h
index cec4158b..367e8f1a 100644
--- a/llama.h
+++ b/llama.h
@@ -61,6 +61,7 @@ extern "C" {
     enum llama_vocab_type {
         LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
         LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
+        LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
     };
 
     enum llama_token_type {
-- 
cgit v1.2.3