diff options
author | Jared Van Bortel <jared@nomic.ai> | 2024-01-20 18:14:18 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-20 18:14:18 -0500 |
commit | b43ebde3b0ccbc42d9dd782b32e2fd8eb35b43b5 (patch) | |
tree | 74759304364d6257681e092303c2e125b5113a6d /convert-llama-ggml-to-gguf.py | |
parent | 97c1549808d2742d37584a3c9df28154bdf34417 (diff) |
convert : partially revert PR #4818 (#5041)
Diffstat (limited to 'convert-llama-ggml-to-gguf.py')
-rwxr-xr-x | convert-llama-ggml-to-gguf.py | 14 |
1 files changed, 5 insertions, 9 deletions
diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py index e359330a..b3310806 100755 --- a/convert-llama-ggml-to-gguf.py +++ b/convert-llama-ggml-to-gguf.py @@ -2,6 +2,7 @@ from __future__ import annotations import argparse +import os import struct import sys from enum import IntEnum @@ -9,7 +10,6 @@ from pathlib import Path import numpy as np -import os if 'NO_LOCAL_GGUF' not in os.environ: sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf @@ -371,15 +371,11 @@ def handle_metadata(cfg, hp): params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path) else: raise ValueError('Unable to load metadata') - vocab = convert.load_vocab( - cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir, - cfg.vocabtype) - # FIXME: Respect cfg.vocab_dir? - svocab = gguf.SpecialVocab(cfg.model_metadata_dir, - load_merges = cfg.vocabtype == 'bpe', - n_vocab = vocab.vocab_size) + vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir) + vocab_factory = convert.VocabFactory(vocab_path) + vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype, cfg.model_metadata_dir) convert.check_vocab_size(params, vocab) - return (params, vocab, svocab) + return params, vocab, special_vocab def handle_args(): |