diff options
author | Pavol Rusnak <pavol@rusnak.io> | 2023-03-29 21:31:24 +0200 |
---|---|---|
committer | Pavol Rusnak <pavol@rusnak.io> | 2023-03-31 10:32:01 +0200 |
commit | cbef542879962fdc491656cd0c8cadd65a5f1356 (patch) | |
tree | ba31f66c0613411466b31c822fb5bac2b24c910a /convert-gpt4all-to-ggml.py | |
parent | 9733104be5389ebb1ff05095eca2a70280cd875a (diff) |
py : cleanup the code
- use f-strings where possible
- drop first param of encode/decode functions since "utf-8" is the default
Diffstat (limited to 'convert-gpt4all-to-ggml.py')
-rw-r--r-- | convert-gpt4all-to-ggml.py | 6 |
1 files changed, 3 insertions, 3 deletions
diff --git a/convert-gpt4all-to-ggml.py b/convert-gpt4all-to-ggml.py index f1d9d7ae..b1a5e056 100644 --- a/convert-gpt4all-to-ggml.py +++ b/convert-gpt4all-to-ggml.py @@ -49,7 +49,7 @@ def write_header(f_out, header): def write_tokens(fout, tokenizer): for i in range(tokenizer.vocab_size()): if tokenizer.is_unknown(i): - text = " \u2047 ".encode("utf-8") + text = " \u2047 ".encode() elif tokenizer.is_control(i): text = b"" elif tokenizer.is_byte(i): @@ -60,13 +60,13 @@ def write_tokens(fout, tokenizer): byte_value = int(piece[3:-1], 16) text = struct.pack("B", byte_value) else: - text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") + text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode() fout.write(struct.pack("i", len(text))) fout.write(text) fout.write(struct.pack("f", tokenizer.get_score(i))) # TODO: GPT4All - add extra <pad> token - text = "<pad>".encode("utf-8") + text = "<pad>".encode() fout.write(struct.pack("i", len(text))) fout.write(text) fout.write(struct.pack("f", 0.0)) |