llama : dynamic temperature sampling (#4972)

* implemented dynamic temperature sampling from koboldcpp * removed trailing whitespace * removed unused temp parameter in llama_sample_entropy * exposed exponent_val in dynamic temp sampler * added debug check for printf statements * use nullptr in llama_sample_softmax call during llama_sample_entropy this avoids counting the time taken stats twice Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * return earlier if there is only 1 candiate (i.e. max_entropy == 0) * reformat 't' case in llama_sample_queue Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * check for one or zero candidates case in llama_sample_entropy --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
author: l3utterfly <gc.pthzfoldr@gmail.com> 2024-01-26 05:06:22 +0900
committer: GitHub <noreply@github.com> 2024-01-25 22:06:22 +0200
commit: 5eaf9964fc797d4585c214db32a463d557f3ed33 (patch)
tree: e41be4957a366e1210769298156de9f09b9593c8 /common/sampling.h
parent: d292f4f2047963f558dd516f1baaa71793e9acf2 (diff)
1 files changed, 2 insertions, 0 deletions
diff --git a/common/sampling.h b/common/sampling.h
index 2ee18037..88899c09 100644
--- a/common/sampling.h
+++ b/common/sampling.h
@@ -18,6 +18,8 @@ typedef struct llama_sampling_params {
     float       tfs_z                 = 1.00f;    // 1.0 = disabled
     float       typical_p             = 1.00f;    // 1.0 = disabled
     float       temp                  = 0.80f;    // <= 0.0 to sample greedily, 0.0 to not output probabilities
+    float       dynatemp_range        = 0.00f;    // 0.0 = disabled
+    float       dynatemp_exponent     = 1.00f;    // controls how entropy maps to temperature in dynamic temperature sampler
     int32_t     penalty_last_n        = 64;       // last n tokens to penalize (0 = disable penalty, -1 = context size)
     float       penalty_repeat        = 1.10f;    // 1.0 = disabled
     float       penalty_freq          = 0.00f;    // 0.0 = disabled
author	l3utterfly <gc.pthzfoldr@gmail.com>	2024-01-26 05:06:22 +0900
committer	GitHub <noreply@github.com>	2024-01-25 22:06:22 +0200
commit	5eaf9964fc797d4585c214db32a463d557f3ed33 (patch)
tree	e41be4957a366e1210769298156de9f09b9593c8 /common/sampling.h
parent	d292f4f2047963f558dd516f1baaa71793e9acf2 (diff)