From 777f42ba18b29f25c71ff8de3ecf97b8017304c0 Mon Sep 17 00:00:00 2001
From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
Date: Tue, 22 Aug 2023 17:39:39 -0600
Subject: Improve handling of special tokens in GGML to GGUF converter (#2725)

* Improve UNK, BOS, EOS token handling when converting without metadata.

* Allow importing as a module.

* Remove some obsolete code and minor cleanups.

* Set default UNK token mapping from -1 to 0 in llama.cpp

* Try to handle overflow due to buggy Windows Python with a better error message
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'llama.cpp')

diff --git a/llama.cpp b/llama.cpp
index 6c5da130..fd8eaa18 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -703,7 +703,7 @@ struct llama_vocab {
     // default LLaMA special tokens
     id special_bos_id = 1;
     id special_eos_id = 2;
-    id special_unk_id = -1;
+    id special_unk_id = 0;
     id special_sep_id = -1;
     id special_pad_id = -1;
 
-- 
cgit v1.2.3