diff options
author | Galunid <karolek1231456@gmail.com> | 2024-05-30 02:10:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-30 02:10:40 +0200 |
commit | eb57fee51f7b4d78039f003249873c2eb46f12f6 (patch) | |
tree | 448d7476083ab56bc501b1ce771bdc928d6a4d8c | |
parent | 55d62262a99cd8bc28a1492975791fe433c8cc0f (diff) |
gguf-py : Add tokenizer.ggml.pre to gguf-new-metadata.py (#7627)
-rwxr-xr-x | gguf-py/scripts/gguf-new-metadata.py | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/gguf-py/scripts/gguf-new-metadata.py b/gguf-py/scripts/gguf-new-metadata.py index c9f1927f..21e91180 100755 --- a/gguf-py/scripts/gguf-new-metadata.py +++ b/gguf-py/scripts/gguf-new-metadata.py @@ -144,6 +144,7 @@ def main() -> None: parser.add_argument("--general-description", type=str, help="The models general.description", metavar='"Description ..."') parser.add_argument("--chat-template", type=str, help="Chat template string (or JSON string containing templates)", metavar='"{% ... %} ..."') parser.add_argument("--chat-template-config", type=Path, help="Config file containing chat template(s)", metavar='tokenizer_config.json') + parser.add_argument("--pre-tokenizer", type=str, help="The models tokenizer.ggml.pre", metavar='"pre tokenizer"') parser.add_argument("--remove-metadata", action="append", type=str, help="Remove metadata (by key name) from output model", metavar='general.url') parser.add_argument("--special-token", action="append", type=str, help="Special token by value", nargs=2, metavar=(' | '.join(token_names.keys()), '"<token>"')) parser.add_argument("--special-token-by-id", action="append", type=str, help="Special token by id", nargs=2, metavar=(' | '.join(token_names.keys()), '0')) @@ -172,6 +173,9 @@ def main() -> None: if template: new_metadata[gguf.Keys.Tokenizer.CHAT_TEMPLATE] = MetadataDetails(gguf.GGUFValueType.STRING, template) + if args.pre_tokenizer: + new_metadata[gguf.Keys.Tokenizer.PRE] = MetadataDetails(gguf.GGUFValueType.STRING, args.pre_tokenizer) + if remove_metadata: logger.warning('*** Warning *** Warning *** Warning **') logger.warning('* Most metadata is required for a fully functional GGUF file,') |