From 777f42ba18b29f25c71ff8de3ecf97b8017304c0 Mon Sep 17 00:00:00 2001 From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> Date: Tue, 22 Aug 2023 17:39:39 -0600 Subject: Improve handling of special tokens in GGML to GGUF converter (#2725) * Improve UNK, BOS, EOS token handling when converting without metadata. * Allow importing as a module. * Remove some obsolete code and minor cleanups. * Set default UNK token mapping from -1 to 0 in llama.cpp * Try to handle overflow due to buggy Windows Python with a better error message --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 6c5da130..fd8eaa18 100644 --- a/llama.cpp +++ b/llama.cpp @@ -703,7 +703,7 @@ struct llama_vocab { // default LLaMA special tokens id special_bos_id = 1; id special_eos_id = 2; - id special_unk_id = -1; + id special_unk_id = 0; id special_sep_id = -1; id special_pad_id = -1; -- cgit v1.2.3