`grammars`: fix resampling logic regression (#7424)

author: Olivier Chafik <ochafik@users.noreply.github.com> 2024-05-21 20:40:00 +0100
committer: GitHub <noreply@github.com> 2024-05-21 20:40:00 +0100
commit: e402de364b643cb89ea9f43057733b5d36298670 (patch)
tree: 0c3b1d54bc5def33eb553182955260eee37908f6 /examples/main/main.cpp
parent: fcf6538ba6702c55eaec70da9a75c81d04900a72 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 9dee4100..832b51ee 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -707,7 +707,7 @@ int main(int argc, char ** argv) {
 
             const llama_token id = llama_sampling_sample(ctx_sampling, ctx, ctx_guidance);
 
-            llama_sampling_accept(ctx_sampling, ctx, id, true);
+            llama_sampling_accept(ctx_sampling, ctx, id, /* apply_grammar= */ true);
 
             LOG("last: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, ctx_sampling->prev).c_str());
 
@@ -728,7 +728,7 @@ int main(int argc, char ** argv) {
 
                 // push the prompt in the sampling context in order to apply repetition penalties later
                 // for the prompt, we don't apply grammar rules
-                llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], false);
+                llama_sampling_accept(ctx_sampling, ctx, embd_inp[n_consumed], /* apply_grammar= */ false);
 
                 ++n_consumed;
                 if ((int) embd.size() >= params.n_batch) {
author	Olivier Chafik <ochafik@users.noreply.github.com>	2024-05-21 20:40:00 +0100
committer	GitHub <noreply@github.com>	2024-05-21 20:40:00 +0100
commit	e402de364b643cb89ea9f43057733b5d36298670 (patch)
tree	0c3b1d54bc5def33eb553182955260eee37908f6 /examples/main/main.cpp
parent	fcf6538ba6702c55eaec70da9a75c81d04900a72 (diff)