summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorDouglas Hanley <thesecretaryofwar@gmail.com>2024-02-15 11:21:49 -0600
committerGitHub <noreply@github.com>2024-02-15 12:21:49 -0500
commit4524290e87b8e107cc2b56e1251751546f4b9051 (patch)
tree38d50aa2850bc2ecb53619fb9f03e0f91953a4c6 /llama.h
parentc06e45d72983d9ace7b1535f7e7ea258d212169e (diff)
Use correct type of pooling for embedding models (#5500)
Use correct type of pooling for embedding models
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 5ef78ec9..4a26bd61 100644
--- a/llama.h
+++ b/llama.h
@@ -112,6 +112,12 @@ extern "C" {
LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
};
+ enum llama_pooling_type {
+ LLAMA_POOLING_NONE = 0,
+ LLAMA_POOLING_MEAN = 1,
+ LLAMA_POOLING_CLS = 2,
+ };
+
enum llama_split_mode {
LLAMA_SPLIT_NONE = 0, // single GPU
LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs