diff options
author | Douglas Hanley <thesecretaryofwar@gmail.com> | 2024-06-21 00:38:22 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-21 08:38:22 +0300 |
commit | 80ea089d771f0c2d97afa8bead80ded412f600d7 (patch) | |
tree | 25c04a967b5913ffdc00d1a851dcfbeb9ab37a37 /common/common.cpp | |
parent | 0e64591e8290037db6412665a56354b789a0597e (diff) |
llama : allow pooled embeddings on any model (#7477)
* create append_pooling operation; allow to specify attention_type; add last token pooling; update examples
* find result_norm/result_embd tensors properly; update output allocation logic
* only use embd output for pooling_type NONE
* get rid of old causal_attn accessor
* take out attention_type; add in llama_set_embeddings
* bypass logits when doing non-NONE pooling
Diffstat (limited to 'common/common.cpp')
-rw-r--r-- | common/common.cpp | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp index 9c23d001..64f160af 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -541,6 +541,7 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; } else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; } else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; } + else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; } else { invalid_param = true; } return true; } @@ -1869,6 +1870,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "backend" }); options.push_back({ "*", " --rpc SERVERS", "comma separated list of RPC servers" }); + if (llama_supports_mlock()) { options.push_back({ "*", " --mlock", "force system to keep model in RAM rather than swapping or compressing" }); } |