summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp26
1 files changed, 26 insertions, 0 deletions
diff --git a/llama.cpp b/llama.cpp
index e599917a..7ee58929 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -7368,6 +7368,32 @@ void llama_sample_top_p(struct llama_context * ctx, llama_token_data_array * can
}
}
+void llama_sample_min_p(struct llama_context * ctx, llama_token_data_array * candidates, float p, size_t min_keep) {
+ if (p <= 0.0f || !candidates->size) {
+ return;
+ }
+
+ llama_sample_softmax(ctx, candidates);
+
+ const int64_t t_start_sample_us = ggml_time_us();
+
+ float scale = candidates->data[0].p; // scale by max prob
+ size_t i = 1; // first token always matches
+
+ for (; i < candidates->size; ++i) {
+ if (candidates->data[i].p < p * scale && i >= min_keep) {
+ break; // prob too small
+ }
+ }
+
+ // Resize the output vector to keep only the matching tokens
+ candidates->size = i;
+
+ if (ctx) {
+ ctx->t_sample_us += ggml_time_us() - t_start_sample_us;
+ }
+}
+
void llama_sample_tail_free(struct llama_context * ctx, llama_token_data_array * candidates, float z, size_t min_keep) {
if (z >= 1.0f || candidates->size <= 2) {
return;