summaryrefslogtreecommitdiff
path: root/examples/server
diff options
context:
space:
mode:
authorAlexey Parfenov <zxed@alkatrazstudio.net>2024-02-16 11:33:25 +0000
committerGitHub <noreply@github.com>2024-02-16 13:33:25 +0200
commit6dcc02d2444c779c18d49c364c5d5c5728b6b484 (patch)
tree938e984485a0146a61d4254911c308fe83a6c789 /examples/server
parent5f5808ca7b7f23a1fa7a77241842bb84a0e55108 (diff)
server : add "samplers" param to control the samplers order (#5494)
Diffstat (limited to 'examples/server')
-rw-r--r--examples/server/README.md2
-rw-r--r--examples/server/server.cpp25
2 files changed, 27 insertions, 0 deletions
diff --git a/examples/server/README.md b/examples/server/README.md
index 8e141d22..24936874 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -204,6 +204,8 @@ node index.js
`system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
+ `samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. (default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values)
+
### Result JSON
- Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 0cb802ce..a0b46970 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -672,6 +672,24 @@ struct llama_server_context
}
}
+ const auto &samplers_sequence = data.find("samplers");
+ if (samplers_sequence != data.end() && samplers_sequence->is_array())
+ {
+ std::vector<std::string> sampler_names;
+ for (const auto &sampler_name : *samplers_sequence)
+ {
+ if (sampler_name.is_string())
+ {
+ sampler_names.emplace_back(sampler_name);
+ }
+ }
+ slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
+ }
+ else
+ {
+ slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
+ }
+
if (multimodal)
{
const auto &images_data = data.find("image_data");
@@ -1026,6 +1044,12 @@ struct llama_server_context
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
+ std::vector<std::string> samplers_sequence;
+ for (const auto &sampler_type : slot.sparams.samplers_sequence)
+ {
+ samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
+ }
+
return json {
{"n_ctx", slot.n_ctx},
{"model", params.model_alias},
@@ -1056,6 +1080,7 @@ struct llama_server_context
{"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs},
{"grammar", slot.sparams.grammar},
+ {"samplers", samplers_sequence}
};
}