diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-02-08 09:46:30 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-02-08 09:46:30 +0100 |
commit | 26d4efd11e48908e14e2ee9471a7fc4c57079a1d (patch) | |
tree | 04c16bf0611e416a0f5a792672f276a3cc55af7f | |
parent | 8504d2d0da8cc7a1f2eee0e9e56949f960510b75 (diff) |
sampling: fix top_k <= 0 (#5388)
* sampling: fix top_k <= 0
* Update llama.cpp
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
-rw-r--r-- | common/sampling.cpp | 2 | ||||
-rw-r--r-- | llama.cpp | 4 | ||||
-rw-r--r-- | tests/test-sampling.cpp | 2 |
3 files changed, 7 insertions, 1 deletions
diff --git a/common/sampling.cpp b/common/sampling.cpp index e8675a8c..844ad7c5 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -132,7 +132,7 @@ static void sampler_queue( const float temp = params.temp; const float dynatemp_range = params.dynatemp_range; const float dynatemp_exponent = params.dynatemp_exponent; - const int32_t top_k = params.top_k <= 0 ? n_vocab : params.top_k; + const int32_t top_k = params.top_k; const float top_p = params.top_p; const float min_p = params.min_p; const float tfs_z = params.tfs_z; @@ -8585,6 +8585,10 @@ void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * can // } const int64_t t_start_sample_us = ggml_time_us(); + + if (k <= 0) { + k = candidates->size; + } k = std::max(k, (int) min_keep); k = std::min(k, (int) candidates->size); diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index c3b3d662..6374958f 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -235,6 +235,8 @@ int main(void) { test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 1); test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f}, 3); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 4); + test_top_k({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f, 0.2f, 0.1f}, 0); test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f}, 0); test_top_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f, 0.3f}, 0.7f); |