summaryrefslogtreecommitdiff
path: root/convert-gpt4all-to-ggml.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert-gpt4all-to-ggml.py')
-rw-r--r--convert-gpt4all-to-ggml.py6
1 files changed, 3 insertions, 3 deletions
diff --git a/convert-gpt4all-to-ggml.py b/convert-gpt4all-to-ggml.py
index f1d9d7ae..b1a5e056 100644
--- a/convert-gpt4all-to-ggml.py
+++ b/convert-gpt4all-to-ggml.py
@@ -49,7 +49,7 @@ def write_header(f_out, header):
def write_tokens(fout, tokenizer):
for i in range(tokenizer.vocab_size()):
if tokenizer.is_unknown(i):
- text = " \u2047 ".encode("utf-8")
+ text = " \u2047 ".encode()
elif tokenizer.is_control(i):
text = b""
elif tokenizer.is_byte(i):
@@ -60,13 +60,13 @@ def write_tokens(fout, tokenizer):
byte_value = int(piece[3:-1], 16)
text = struct.pack("B", byte_value)
else:
- text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
+ text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode()
fout.write(struct.pack("i", len(text)))
fout.write(text)
fout.write(struct.pack("f", tokenizer.get_score(i)))
# TODO: GPT4All - add extra <pad> token
- text = "<pad>".encode("utf-8")
+ text = "<pad>".encode()
fout.write(struct.pack("i", len(text)))
fout.write(text)
fout.write(struct.pack("f", 0.0))