server : add "samplers" param to control the samplers order (#5494)

author: Alexey Parfenov <zxed@alkatrazstudio.net> 2024-02-16 11:33:25 +0000
committer: GitHub <noreply@github.com> 2024-02-16 13:33:25 +0200
commit: 6dcc02d2444c779c18d49c364c5d5c5728b6b484 (patch)
tree: 938e984485a0146a61d4254911c308fe83a6c789 /examples/server
parent: 5f5808ca7b7f23a1fa7a77241842bb84a0e55108 (diff)
2 files changed, 27 insertions, 0 deletions
diff --git a/examples/server/README.md b/examples/server/README.md
index 8e141d22..24936874 100644
--- a/examples/server/README.md
+++ b/examples/server/README.md
@@ -204,6 +204,8 @@ node index.js
 
     `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. [See more](#change-system-prompt-on-runtime)
 
+    `samplers`: The order the samplers should be applied in. An array of strings representing sampler type names. If a sampler is not set, it will not be used. If a sampler is specified more than once, it will be applied multiple times. (default: `["top_k", "tfs_z", "typical_p", "top_p", "min_p", "temperature"]` - these are all the available values)
+
 ### Result JSON
 
 - Note: When using streaming mode (`stream`) only `content` and `stop` will be returned until end of completion.
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 0cb802ce..a0b46970 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -672,6 +672,24 @@ struct llama_server_context
             }
         }
 
+        const auto &samplers_sequence = data.find("samplers");
+        if (samplers_sequence != data.end() && samplers_sequence->is_array())
+        {
+            std::vector<std::string> sampler_names;
+            for (const auto &sampler_name : *samplers_sequence)
+            {
+                if (sampler_name.is_string())
+                {
+                    sampler_names.emplace_back(sampler_name);
+                }
+            }
+            slot->sparams.samplers_sequence = sampler_types_from_names(sampler_names, false);
+        }
+        else
+        {
+            slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
+        }
+
         if (multimodal)
         {
             const auto &images_data = data.find("image_data");
@@ -1026,6 +1044,12 @@ struct llama_server_context
         const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
         const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
                                 eos_bias->second < 0.0f && std::isinf(eos_bias->second);
+        std::vector<std::string> samplers_sequence;
+        for (const auto &sampler_type : slot.sparams.samplers_sequence)
+        {
+            samplers_sequence.emplace_back(sampler_type_to_name_string(sampler_type));
+        }
+
         return json {
             {"n_ctx",             slot.n_ctx},
             {"model",             params.model_alias},
@@ -1056,6 +1080,7 @@ struct llama_server_context
             {"logit_bias",        slot.sparams.logit_bias},
             {"n_probs",           slot.sparams.n_probs},
             {"grammar",           slot.sparams.grammar},
+            {"samplers",          samplers_sequence}
         };
     }
author	Alexey Parfenov <zxed@alkatrazstudio.net>	2024-02-16 11:33:25 +0000
committer	GitHub <noreply@github.com>	2024-02-16 13:33:25 +0200
commit	6dcc02d2444c779c18d49c364c5d5c5728b6b484 (patch)
tree	938e984485a0146a61d4254911c308fe83a6c789 /examples/server
parent	5f5808ca7b7f23a1fa7a77241842bb84a0e55108 (diff)