samplers : Min-P sampler implementation [alternative to Top P/Top K] (#3841)

* Introduce the new Min-P sampler by @kalomaze The Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token. * Min-P enabled and set to 0.05 default --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
author: kalomaze <66376113+kalomaze@users.noreply.github.com> 2023-10-31 14:44:49 -0500
committer: GitHub <noreply@github.com> 2023-10-31 20:44:49 +0100
commit: 238657db2364cfb728c694470a4a81702afea760 (patch)
tree: 8b870a0600d1a2de4d9efe7981c24164357f5552 /llama.cpp
parent: 07178c98e1b61a5e2af39d347add12e7eb9e08e1 (diff)
1 files changed, 26 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index e599917a..7ee58929 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7368,6 +7368,32 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
     }
 }
 
+void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) {
+    if (p <= 0.0f || !candidates->size) {
+        return;
+    }
+
+    llama_sample_softmax(ctx, candidates);
+
+    const int64_t t_start_sample_us = ggml_time_us();
+
+    float scale = candidates->data[0].p; // scale by max prob
+    size_t i = 1; // first token always matches
+
+    for (; i < candidates->size; ++i) {
+        if (candidates->data[i].p < p * scale && i >= min_keep) {
+            break; // prob too small
+        }
+    }
+
+    // Resize the output vector to keep only the matching tokens
+    candidates->size = i;
+
+    if (ctx) {
+        ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
+    }
+}
+
 void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep) {
     if (z >= 1.0f || candidates->size <= 2) {
         return;
author	kalomaze <66376113+kalomaze@users.noreply.github.com>	2023-10-31 14:44:49 -0500
committer	GitHub <noreply@github.com>	2023-10-31 20:44:49 +0100
commit	238657db2364cfb728c694470a4a81702afea760 (patch)
tree	8b870a0600d1a2de4d9efe7981c24164357f5552 /llama.cpp
parent	07178c98e1b61a5e2af39d347add12e7eb9e08e1 (diff)