summaryrefslogtreecommitdiff
path: root/examples/embedding
diff options
context:
space:
mode:
authorDouglas Hanley <thesecretaryofwar@gmail.com>2024-02-11 10:21:38 -0600
committerGitHub <noreply@github.com>2024-02-11 11:21:38 -0500
commit2891c8aa9af17f4ff636ff3868bc34ff72b56e25 (patch)
tree1a037e8ad635aa54ddf8ab8cb39c04bb4f8cf141 /examples/embedding
parent97a336507ed9b971d72262bec7e2b8b7016a054a (diff)
Add support for BERT embedding models (#5423)
* BERT model graph construction (build_bert) * WordPiece tokenizer (llm_tokenize_wpm) * Add flag for non-causal attention models * Allow for models that only output embeddings * Support conversion of BERT models to GGUF * Based on prior work by @xyzhang626 and @skeskinen --------- Co-authored-by: Jared Van Bortel <jared@nomic.ai> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples/embedding')
-rw-r--r--examples/embedding/embedding.cpp12
1 files changed, 11 insertions, 1 deletions
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 3295cd24..27376c8f 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -87,7 +87,17 @@ int main(int argc, char ** argv) {
}
const int n_embd = llama_n_embd(model);
- const auto * embeddings = llama_get_embeddings(ctx);
+ auto * embeddings = llama_get_embeddings(ctx);
+
+ // l2-normalize embeddings
+ float norm = 0;
+ for (int i = 0; i < n_embd; i++) {
+ norm += embeddings[i] * embeddings[i];
+ }
+ norm = sqrt(norm);
+ for (int i = 0; i < n_embd; i++) {
+ embeddings[i] /= norm;
+ }
for (int i = 0; i < n_embd; i++) {
printf("%f ", embeddings[i]);