summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorDouglas Hanley <thesecretaryofwar@gmail.com>2024-02-11 10:21:38 -0600
committerGitHub <noreply@github.com>2024-02-11 11:21:38 -0500
commit2891c8aa9af17f4ff636ff3868bc34ff72b56e25 (patch)
tree1a037e8ad635aa54ddf8ab8cb39c04bb4f8cf141 /llama.h
parent97a336507ed9b971d72262bec7e2b8b7016a054a (diff)
Add support for BERT embedding models (#5423)
* BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index cec4158b..367e8f1a 100644
--- a/llama.h
+++ b/llama.h
@@ -61,6 +61,7 @@ extern "C" {
enum llama_vocab_type {
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
+ LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
};
enum llama_token_type {