diff options
author | kalomaze <66376113+kalomaze@users.noreply.github.com> | 2023-10-31 14:44:49 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-31 20:44:49 +0100 |
commit | 238657db2364cfb728c694470a4a81702afea760 (patch) | |
tree | 8b870a0600d1a2de4d9efe7981c24164357f5552 /common/sampling.cpp | |
parent | 07178c98e1b61a5e2af39d347add12e7eb9e08e1 (diff) |
samplers : Min-P sampler implementation [alternative to Top P/Top K] (#3841)
* Introduce the new Min-P sampler by @kalomaze
The Min-P sampling method was designed as an alternative to Top-P, and aims to ensure a balance of quality and variety. The parameter *p* represents the minimum probability for a token to be considered, relative to the probability of the most likely token.
* Min-P enabled and set to 0.05 default
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: cebtenzzre <cebtenzzre@gmail.com>
Diffstat (limited to 'common/sampling.cpp')
-rw-r--r-- | common/sampling.cpp | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/common/sampling.cpp b/common/sampling.cpp index c4996c98..673d67a6 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -89,10 +89,10 @@ std::string llama_sampling_print(const llama_sampling_params & params) { snprintf(result, sizeof(result), "\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n" - "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, typical_p = %.3f, temp = %.3f\n" + "\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n" "\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f", params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present, - params.top_k, params.tfs_z, params.top_p, params.typical_p, params.temp, + params.top_k, params.tfs_z, params.top_p, params.min_p, params.typical_p, params.temp, params.mirostat, params.mirostat_eta, params.mirostat_tau); return std::string(result); @@ -110,6 +110,7 @@ llama_token llama_sampling_sample( const float temp = params.temp; const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k; const float top_p = params.top_p; + const float min_p = params.min_p; const float tfs_z = params.tfs_z; const float typical_p = params.typical_p; const int32_t penalty_last_n = params.penalty_last_n < 0 ? params.n_prev : params.penalty_last_n; @@ -190,6 +191,7 @@ llama_token llama_sampling_sample( llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); + llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); llama_sample_temp (ctx_main, &cur_p, temp); id = llama_sample_token(ctx_main, &cur_p); |