Respect tokenizer.ggml.add_bos_token value when tokenizing (#4040)

* gguf-py: gguf-dump: Respect --no-tensor flag in JSON mode. * Respect add_bos_token GGUF metadata value * gguf-py: Try to fix SpecialVocab giving up too easily for the Nth time
author: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> 2023-11-16 19:14:37 -0700
committer: GitHub <noreply@github.com> 2023-11-16 19:14:37 -0700
commit: 91f6499393d2d999331fbfdba47a7f8b9f913f0d (patch)
tree: 27caf3ad0b9cec979bb5ed3317b5334bdcd9470c /examples/perplexity/perplexity.cpp
parent: 8da46278e1a57107591653275f8e03a281de94f0 (diff)
1 files changed, 3 insertions, 5 deletions
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index de60c522..9a77beca 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -149,8 +149,7 @@ static results_perplexity perplexity_v2(llama_context * ctx, const gpt_params &
     // Output: `perplexity: 13.5106 [114/114]`
     // BOS tokens will be added for each chunk before eval
 
-    const bool is_spm = llama_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
-    const bool add_bos = is_spm;
+    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
 
     fprintf(stderr, "%s: tokenizing the input ..\n", __func__);
 
@@ -288,8 +287,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
     // Output: `perplexity: 13.5106 [114/114]`
     // BOS tokens will be added for each chunk before eval
 
-    const bool is_spm = llama_vocab_type(llama_get_model(ctx)) == LLAMA_VOCAB_TYPE_SPM;
-    const bool add_bos = is_spm;
+    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
     const int n_ctx = llama_n_ctx(ctx);
 
     auto tim1 = std::chrono::high_resolution_clock::now();
@@ -481,7 +479,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
     fprintf(stderr, "================================= is_spm = %d\n", is_spm);
 
     // This is needed as usual for LLaMA models
-    const bool add_bos = is_spm;
+    const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
 
     // Number of tasks to use when computing the score
     if ( params.hellaswag_tasks < hs_task_count  ) {
author	Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>	2023-11-16 19:14:37 -0700
committer	GitHub <noreply@github.com>	2023-11-16 19:14:37 -0700
commit	91f6499393d2d999331fbfdba47a7f8b9f913f0d (patch)
tree	27caf3ad0b9cec979bb5ed3317b5334bdcd9470c /examples/perplexity/perplexity.cpp
parent	8da46278e1a57107591653275f8e03a281de94f0 (diff)