From 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 Mon Sep 17 00:00:00 2001 From: Douglas Hanley Date: Sun, 11 Feb 2024 10:21:38 -0600 Subject: Add support for BERT embedding models (#5423) * BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel Co-authored-by: Jared Van Bortel Co-authored-by: Georgi Gerganov --- examples/embedding/embedding.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'examples/embedding/embedding.cpp') diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 3295cd24..27376c8f 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -87,7 +87,17 @@ int main(int argc, char ** argv) { } const int n_embd = llama_n_embd(model); - const auto * embeddings = llama_get_embeddings(ctx); + auto * embeddings = llama_get_embeddings(ctx); + + // l2-normalize embeddings + float norm = 0; + for (int i = 0; i < n_embd; i++) { + norm += embeddings[i] * embeddings[i]; + } + norm = sqrt(norm); + for (int i = 0; i < n_embd; i++) { + embeddings[i] /= norm; + } for (int i = 0; i < n_embd; i++) { printf("%f ", embeddings[i]); -- cgit v1.2.3