summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorl3utterfly <gc.pthzfoldr@gmail.com>2024-01-26 05:06:22 +0900
committerGitHub <noreply@github.com>2024-01-25 22:06:22 +0200
commit5eaf9964fc797d4585c214db32a463d557f3ed33 (patch)
treee41be4957a366e1210769298156de9f09b9593c8 /llama.h
parentd292f4f2047963f558dd516f1baaa71793e9acf2 (diff)
llama : dynamic temperature sampling (#4972)
* implemented dynamic temperature sampling from koboldcpp * removed trailing whitespace * removed unused temp parameter in llama_sample_entropy * exposed exponent_val in dynamic temp sampler * added debug check for printf statements * use nullptr in llama_sample_softmax call during llama_sample_entropy this avoids counting the time taken stats twice Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * return earlier if there is only 1 candiate (i.e. max_entropy == 0) * reformat 't' case in llama_sample_queue Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * check for one or zero candidates case in llama_sample_entropy --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index bb605455..7b3634aa 100644
--- a/llama.h
+++ b/llama.h
@@ -775,6 +775,14 @@ extern "C" {
float p,
size_t min_keep);
+ /// @details Dynamic temperature implementation described in the paper https://arxiv.org/abs/2309.02772.
+ LLAMA_API void llama_sample_entropy(
+ struct llama_context * ctx,
+ llama_token_data_array * candidates_p,
+ float min_temp,
+ float max_temp,
+ float exponent_val);
+
LLAMA_API void llama_sample_temp(
struct llama_context * ctx,
llama_token_data_array * candidates,