summaryrefslogtreecommitdiff
path: root/common/common.cpp
diff options
context:
space:
mode:
authorsasha0552 <admin@sasha0552.org>2024-06-08 07:50:31 +0000
committerGitHub <noreply@github.com>2024-06-08 10:50:31 +0300
commit7a16ce7db2a74a223f0f3b9cee66d4539c5bce8f (patch)
treef1235f9d8ee68d4c39403bd2bca1078062cab2d7 /common/common.cpp
parentda799b41891e34aac86ce4e173f9c4c0afd4fab3 (diff)
server : smart slot selection using Longest Common Prefix (#7728)
* server : Smart selection of available slot using Longest Common Substring * add usage * remove trailing whitespaces * Use Longest Common Prefix (LCP) instead of LCS * Rename argument
Diffstat (limited to 'common/common.cpp')
-rw-r--r--common/common.cpp10
1 files changed, 10 insertions, 0 deletions
diff --git a/common/common.cpp b/common/common.cpp
index cdcb352b..d2a8bb69 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1491,6 +1491,14 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.chat_template = argv[i];
return true;
}
+ if (arg == "--slot-prompt-similarity" || arg == "-sps") {
+ if (++i >= argc) {
+ invalid_param = true;
+ return true;
+ }
+ params.slot_prompt_similarity = std::stof(argv[i]);
+ return true;
+ }
if (arg == "-pps") {
params.is_pp_shared = true;
return true;
@@ -1913,6 +1921,8 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
"set custom jinja chat template (default: template taken from model's metadata)\n"
"only commonly used templates are accepted:\n"
"https://github.com/ggerganov/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template" });
+ options.push_back({ "server", "-sps, --slot-prompt-similarity SIMILARITY",
+ "how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity });
#ifndef LOG_DISABLE_LOGS
options.push_back({ "logging" });