From 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 Mon Sep 17 00:00:00 2001 From: Douglas Hanley Date: Sun, 11 Feb 2024 10:21:38 -0600 Subject: Add support for BERT embedding models (#5423) * BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel Co-authored-by: Jared Van Bortel Co-authored-by: Georgi Gerganov --- llama.h | 1 + 1 file changed, 1 insertion(+) (limited to 'llama.h') diff --git a/llama.h b/llama.h index cec4158b..367e8f1a 100644 --- a/llama.h +++ b/llama.h @@ -61,6 +61,7 @@ extern "C" { enum llama_vocab_type { LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding + LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece }; enum llama_token_type { -- cgit v1.2.3