summaryrefslogtreecommitdiff
path: root/convert.py
diff options
context:
space:
mode:
Diffstat (limited to 'convert.py')
-rwxr-xr-xconvert.py16
1 files changed, 9 insertions, 7 deletions
diff --git a/convert.py b/convert.py
index b0f44dbe..a4b87e08 100755
--- a/convert.py
+++ b/convert.py
@@ -3,11 +3,9 @@ from __future__ import annotations
import argparse
import concurrent.futures
-import copy
import enum
import faulthandler
import functools
-import io
import itertools
import json
import math
@@ -23,14 +21,14 @@ from abc import ABCMeta, abstractmethod
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
from dataclasses import dataclass
from pathlib import Path
-from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
+from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
import numpy as np
from sentencepiece import SentencePieceProcessor
import os
if 'NO_LOCAL_GGUF' not in os.environ:
- sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+ sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
import gguf
if TYPE_CHECKING:
@@ -851,7 +849,7 @@ class OutputFile:
elif isinstance(vocab, BpeVocab):
self.gguf.add_tokenizer_model("gpt2")
else:
- raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
+ raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
self.gguf.add_token_list(tokens)
self.gguf.add_token_scores(scores)
self.gguf.add_token_types(toktypes)
@@ -905,7 +903,7 @@ class OutputFile:
return dt.quantize(arr)
@staticmethod
- def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
+ def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
check_vocab_size(params, vocab)
of = OutputFile(fname_out, endianess=endianess)
@@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None:
def main(args_in: list[str] | None = None) -> None:
+ output_choices = ["f32", "f16"]
+ if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+ # We currently only support Q8_0 output on little endian systems.
+ output_choices.append("q8_0")
parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
- parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
+ parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input")
parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")