summaryrefslogtreecommitdiff
path: root/convert-starcoder-hf-to-gguf.py
diff options
context:
space:
mode:
authorCebtenzzre <cebtenzzre@gmail.com>2023-09-28 14:30:15 -0400
committerGitHub <noreply@github.com>2023-09-28 14:30:15 -0400
commitecf90b1a5114034bc0939b3968f549fe4d63cf6d (patch)
treeedd1cbfc46d4be6484b77e22a27210cfedaf146a /convert-starcoder-hf-to-gguf.py
parent2619109ad57d7a75388a9cce51e5da645410d92e (diff)
gguf : make token scores and types optional (#3347)
Diffstat (limited to 'convert-starcoder-hf-to-gguf.py')
-rwxr-xr-xconvert-starcoder-hf-to-gguf.py6
1 files changed, 0 insertions, 6 deletions
diff --git a/convert-starcoder-hf-to-gguf.py b/convert-starcoder-hf-to-gguf.py
index 331e84e9..48e88a77 100755
--- a/convert-starcoder-hf-to-gguf.py
+++ b/convert-starcoder-hf-to-gguf.py
@@ -117,8 +117,6 @@ gguf_writer.add_file_type(ftype)
print("gguf: get tokenizer metadata")
tokens: list[bytearray] = []
-scores: list[float] = []
-toktypes: list[int] = []
tokenizer_json_file = dir_model / 'tokenizer.json'
if not tokenizer_json_file.is_file():
@@ -161,12 +159,8 @@ for i in range(vocab_size):
text = bytearray(pad_token)
tokens.append(text)
- scores.append(0.0) # dymmy
- toktypes.append(gguf.TokenType.NORMAL) # dummy
gguf_writer.add_token_list(tokens)
-gguf_writer.add_token_scores(scores)
-gguf_writer.add_token_types(toktypes)
special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
special_vocab.add_to_gguf(gguf_writer)