summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py9
-rw-r--r--gguf-py/gguf/gguf_writer.py12
2 files changed, 21 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 2566b2fb..1358206a 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -90,6 +90,11 @@ class Keys:
HF_JSON = "tokenizer.huggingface.json"
RWKV = "tokenizer.rwkv.world"
CHAT_TEMPLATE = "tokenizer.chat_template"
+ # FIM/Infill special tokens constants
+ PREFIX_ID = "tokenizer.ggml.prefix_token_id"
+ SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
+ MIDDLE_ID = "tokenizer.ggml.middle_token_id"
+ EOT_ID = "tokenizer.ggml.eot_token_id"
#
@@ -885,3 +890,7 @@ KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV
+KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
+KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
+KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
+KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index f4c44076..ff9326d5 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -469,6 +469,18 @@ class GGUFWriter:
def add_chat_template(self, value: str) -> None:
self.add_string(Keys.Tokenizer.CHAT_TEMPLATE, value)
+ def add_prefix_token_id(self, id: int) -> None:
+ self.add_uint32(Keys.Tokenizer.PREFIX_ID, id)
+
+ def add_suffix_token_id(self, id: int) -> None:
+ self.add_uint32(Keys.Tokenizer.SUFFIX_ID, id)
+
+ def add_middle_token_id(self, id: int) -> None:
+ self.add_uint32(Keys.Tokenizer.MIDDLE_ID, id)
+
+ def add_eot_token_id(self, id: int) -> None:
+ self.add_uint32(Keys.Tokenizer.EOT_ID, id)
+
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
pack_prefix = ''
if not skip_pack_prefix: