From 91f6499393d2d999331fbfdba47a7f8b9f913f0d Mon Sep 17 00:00:00 2001 From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Date: Thu, 16 Nov 2023 19:14:37 -0700 Subject: Respect tokenizer.ggml.add_bos_token value when tokenizing (#4040) * gguf-py: gguf-dump: Respect --no-tensor flag in JSON mode. * Respect add_bos_token GGUF metadata value * gguf-py: Try to fix SpecialVocab giving up too easily for the Nth time --- examples/main/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'examples/main') diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 8d985c82..99d219d6 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -229,7 +229,7 @@ int main(int argc, char ** argv) { } } - const bool add_bos = llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM; + const bool add_bos = llama_should_add_bos_token(model); LOG("add_bos: %d\n", add_bos); std::vector embd_inp; -- cgit v1.2.3