summaryrefslogtreecommitdiff
path: root/common/common.h
diff options
context:
space:
mode:
authorDouglas Hanley <thesecretaryofwar@gmail.com>2024-03-03 04:40:27 -0600
committerGitHub <noreply@github.com>2024-03-03 12:40:27 +0200
commit475df1d6cf817060028d3ff763cb8097d4ec40d6 (patch)
tree5cad43f149f24b7b3f40604b78b7971e458aa309 /common/common.h
parent87c2e8b2797860a06af3d6c06b8488a8ff1a09ab (diff)
llama : allow for user specified embedding pooling type (#5849)
* allow for user specified pooling type * llama : use enum types over int --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'common/common.h')
-rw-r--r--common/common.h7
1 files changed, 5 insertions, 2 deletions
diff --git a/common/common.h b/common/common.h
index ab62bdb8..d3682b7a 100644
--- a/common/common.h
+++ b/common/common.h
@@ -76,8 +76,11 @@ struct gpt_params {
float yarn_beta_slow = 1.0f; // YaRN high correction dim
int32_t yarn_orig_ctx = 0; // YaRN original context length
float defrag_thold = -1.0f; // KV cache defragmentation threshold
- int32_t rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
- ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
+
+ ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
+
+ llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
+ llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
// // sampling parameters
struct llama_sampling_params sparams;