From 2891c8aa9af17f4ff636ff3868bc34ff72b56e25 Mon Sep 17 00:00:00 2001
From: Douglas Hanley <thesecretaryofwar@gmail.com>
Date: Sun, 11 Feb 2024 10:21:38 -0600
Subject: Add support for BERT embedding models (#5423)

* BERT model graph construction (build_bert)
* WordPiece tokenizer (llm_tokenize_wpm)
* Add flag for non-causal attention models
* Allow for models that only output embeddings
* Support conversion of BERT models to GGUF
* Based on prior work by @xyzhang626 and @skeskinen

---------

Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 examples/embedding/embedding.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'examples/embedding/embedding.cpp')

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 3295cd24..27376c8f 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -87,7 +87,17 @@ int main(int argc, char ** argv) {
     }
 
     const int n_embd = llama_n_embd(model);
-    const auto * embeddings = llama_get_embeddings(ctx);
+    auto * embeddings = llama_get_embeddings(ctx);
+
+    // l2-normalize embeddings
+    float norm = 0;
+    for (int i = 0; i < n_embd; i++) {
+        norm += embeddings[i] * embeddings[i];
+    }
+    norm = sqrt(norm);
+    for (int i = 0; i < n_embd; i++) {
+        embeddings[i] /= norm;
+    }
 
     for (int i = 0; i < n_embd; i++) {
         printf("%f ", embeddings[i]);
-- 
cgit v1.2.3