summaryrefslogtreecommitdiff
path: root/examples/speculative/speculative.cpp
diff options
context:
space:
mode:
authorMinsoo Cheong <54794500+mscheong01@users.noreply.github.com>2024-03-24 17:54:07 +0900
committerGitHub <noreply@github.com>2024-03-24 10:54:07 +0200
commit586e7bc561be88e929a9afca7e67d8ead00c53bd (patch)
tree7cf1ce22a8b8ea7d711662947fc698e1718905d3 /examples/speculative/speculative.cpp
parentddf65685105a39a57b1e7f80c3aa502a6313af24 (diff)
sampling : deduplicated code for probability distribution access (#6240)
* sampling: remove duplicated code for probability distribution access * free original_logits * fix original_logits allocation * fixes based on review @cebtenzzre * change function name to `llama_sampling_prepare`
Diffstat (limited to 'examples/speculative/speculative.cpp')
-rw-r--r--examples/speculative/speculative.cpp3
1 files changed, 2 insertions, 1 deletions
diff --git a/examples/speculative/speculative.cpp b/examples/speculative/speculative.cpp
index e991b884..8b31b678 100644
--- a/examples/speculative/speculative.cpp
+++ b/examples/speculative/speculative.cpp
@@ -219,7 +219,8 @@ int main(int argc, char ** argv) {
if (params.sparams.temp > 0) {
// stochastic verification
- llama_token_data_array dist_tgt = llama_sampling_probability_distribution(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft]);
+ llama_token_data_array dist_tgt = llama_sampling_prepare(ctx_sampling, ctx_tgt, NULL, drafts[s_keep].i_batch_tgt[i_dft], true, NULL);
+ llama_sample_softmax(ctx_tgt, &dist_tgt);
float p_tgt = 0, p_dft = 0;
// GGML_ASSERT(dist_tgt.size() == dist_dft.size());