diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-01-28 09:35:14 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-28 09:35:14 +0100 |
commit | b2b2bf988c098851b4f3831f0cf38394bff75121 (patch) | |
tree | ee9f51249f8a6eb62b8b5a73b0263dd546780728 /llama.cpp | |
parent | af4980bfedfd8df43b9e4cd1442895e85fee37bc (diff) |
Tests for min_p, sampling queue (#5147)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 5 |
1 files changed, 5 insertions, 0 deletions
@@ -8133,6 +8133,11 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c } void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) { + // TODO: move bucket sort to separate function so that top_p/tail_free/typical/softmax first is equally fast + // if (k >= (int32_t)candidates->size) { + // return; + // } + const int64_t t_start_sample_us = ggml_time_us(); k = std::max(k, (int) min_keep); |