diff options
author | l3utterfly <gc.pthzfoldr@gmail.com> | 2024-01-26 05:06:22 +0900 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-25 22:06:22 +0200 |
commit | 5eaf9964fc797d4585c214db32a463d557f3ed33 (patch) | |
tree | e41be4957a366e1210769298156de9f09b9593c8 /common/sampling.cpp | |
parent | d292f4f2047963f558dd516f1baaa71793e9acf2 (diff) |
llama : dynamic temperature sampling (#4972)
* implemented dynamic temperature sampling from koboldcpp
* removed trailing whitespace
* removed unused temp parameter in llama_sample_entropy
* exposed exponent_val in dynamic temp sampler
* added debug check for printf statements
* use nullptr in llama_sample_softmax call during llama_sample_entropy
this avoids counting the time taken stats twice
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* return earlier if there is only 1 candiate (i.e. max_entropy == 0)
* reformat 't' case in llama_sample_queue
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
* check for one or zero candidates case in llama_sample_entropy
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
Diffstat (limited to 'common/sampling.cpp')
-rw-r--r-- | common/sampling.cpp | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/common/sampling.cpp b/common/sampling.cpp index dd1ffeb1..efd7eab6 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -129,6 +129,8 @@ static void sampler_queue( const int n_vocab = llama_n_vocab(llama_get_model(ctx_main)); const float temp = params.temp; + const float dynatemp_range = params.dynatemp_range; + const float dynatemp_exponent = params.dynatemp_exponent; const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k; const float top_p = params.top_p; const float min_p = params.min_p; @@ -143,7 +145,15 @@ static void sampler_queue( case 'y': llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break; case 'p': llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break; case 'm': llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break; - case 't': llama_sample_temp (ctx_main, &cur_p, temp); break; + case 't': + if (dynatemp_range > 0) { + float dynatemp_min = std::max(0.0f, temp - dynatemp_range); + float dynatemp_max = std::max(0.0f, temp + dynatemp_range); + llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent); + } else { + llama_sample_temp(ctx_main, &cur_p, temp); + } + break; default : break; } } |