From 475df1d6cf817060028d3ff763cb8097d4ec40d6 Mon Sep 17 00:00:00 2001
From: Douglas Hanley <thesecretaryofwar@gmail.com>
Date: Sun, 3 Mar 2024 04:40:27 -0600
Subject: llama : allow for user specified embedding pooling type (#5849)

* allow for user specified pooling type

* llama : use enum types over int

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 common/common.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'common/common.h')

diff --git a/common/common.h b/common/common.h
index ab62bdb8..d3682b7a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -76,8 +76,11 @@ struct gpt_params {
     float   yarn_beta_slow        = 1.0f;  // YaRN high correction dim
     int32_t yarn_orig_ctx         = 0;     // YaRN original context length
     float   defrag_thold          = -1.0f; // KV cache defragmentation threshold
-    int32_t rope_scaling_type     = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
-    ggml_numa_strategy numa       = GGML_NUMA_STRATEGY_DISABLED;
+
+    ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
+
+    llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
+    llama_pooling_type      pooling_type      = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
 
     // // sampling parameters
     struct llama_sampling_params sparams;
-- 
cgit v1.2.3