summaryrefslogtreecommitdiff
path: root/convert-llama-ggml-to-gguf.py
diff options
context:
space:
mode:
authorJared Van Bortel <jared@nomic.ai>2024-01-20 18:14:18 -0500
committerGitHub <noreply@github.com>2024-01-20 18:14:18 -0500
commitb43ebde3b0ccbc42d9dd782b32e2fd8eb35b43b5 (patch)
tree74759304364d6257681e092303c2e125b5113a6d /convert-llama-ggml-to-gguf.py
parent97c1549808d2742d37584a3c9df28154bdf34417 (diff)
convert : partially revert PR #4818 (#5041)
Diffstat (limited to 'convert-llama-ggml-to-gguf.py')
-rwxr-xr-xconvert-llama-ggml-to-gguf.py14
1 files changed, 5 insertions, 9 deletions
diff --git a/convert-llama-ggml-to-gguf.py b/convert-llama-ggml-to-gguf.py
index e359330a..b3310806 100755
--- a/convert-llama-ggml-to-gguf.py
+++ b/convert-llama-ggml-to-gguf.py
@@ -2,6 +2,7 @@
from __future__ import annotations
import argparse
+import os
import struct
import sys
from enum import IntEnum
@@ -9,7 +10,6 @@ from pathlib import Path
import numpy as np
-import os
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf
@@ -371,15 +371,11 @@ def handle_metadata(cfg, hp):
params = convert.Params.loadOriginalParamsJson(fakemodel, orig_config_path)
else:
raise ValueError('Unable to load metadata')
- vocab = convert.load_vocab(
- cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir,
- cfg.vocabtype)
- # FIXME: Respect cfg.vocab_dir?
- svocab = gguf.SpecialVocab(cfg.model_metadata_dir,
- load_merges = cfg.vocabtype == 'bpe',
- n_vocab = vocab.vocab_size)
+ vocab_path = Path(cfg.vocab_dir if cfg.vocab_dir is not None else cfg.model_metadata_dir)
+ vocab_factory = convert.VocabFactory(vocab_path)
+ vocab, special_vocab = vocab_factory.load_vocab(cfg.vocabtype, cfg.model_metadata_dir)
convert.check_vocab_size(params, vocab)
- return (params, vocab, svocab)
+ return params, vocab, special_vocab
def handle_args():