Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
author: Douglas Hanley <thesecretaryofwar@gmail.com> 2024-02-15 11:21:49 -0600
committer: GitHub <noreply@github.com> 2024-02-15 12:21:49 -0500
commit: 4524290e87b8e107cc2b56e1251751546f4b9051 (patch)
tree: 38d50aa2850bc2ecb53619fb9f03e0f91953a4c6 /llama.h
parent: c06e45d72983d9ace7b1535f7e7ea258d212169e (diff)
1 files changed, 6 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 5ef78ec9..4a26bd61 100644
--- a/llama.h
+++ b/llama.h
@@ -112,6 +112,12 @@ extern "C" {
         LLAMA_ROPE_SCALING_MAX_VALUE   = LLAMA_ROPE_SCALING_YARN,
     };
 
+    enum llama_pooling_type {
+        LLAMA_POOLING_NONE = 0,
+        LLAMA_POOLING_MEAN = 1,
+        LLAMA_POOLING_CLS  = 2,
+    };
+
     enum llama_split_mode {
         LLAMA_SPLIT_NONE    = 0, // single GPU
         LLAMA_SPLIT_LAYER   = 1, // split layers and KV across GPUs
author	Douglas Hanley <thesecretaryofwar@gmail.com>	2024-02-15 11:21:49 -0600
committer	GitHub <noreply@github.com>	2024-02-15 12:21:49 -0500
commit	4524290e87b8e107cc2b56e1251751546f4b9051 (patch)
tree	38d50aa2850bc2ecb53619fb9f03e0f91953a4c6 /llama.h
parent	c06e45d72983d9ace7b1535f7e7ea258d212169e (diff)