diff options
Diffstat (limited to 'convert.py')
-rwxr-xr-x | convert.py | 16 |
1 files changed, 9 insertions, 7 deletions
@@ -3,11 +3,9 @@ from __future__ import annotations import argparse import concurrent.futures -import copy import enum import faulthandler import functools -import io import itertools import json import math @@ -23,14 +21,14 @@ from abc import ABCMeta, abstractmethod from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path -from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar +from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar import numpy as np from sentencepiece import SentencePieceProcessor import os if 'NO_LOCAL_GGUF' not in os.environ: - sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf if TYPE_CHECKING: @@ -851,7 +849,7 @@ class OutputFile: elif isinstance(vocab, BpeVocab): self.gguf.add_tokenizer_model("gpt2") else: - raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab') + raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab') self.gguf.add_token_list(tokens) self.gguf.add_token_scores(scores) self.gguf.add_token_types(toktypes) @@ -905,7 +903,7 @@ class OutputFile: return dt.quantize(arr) @staticmethod - def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None: + def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) of = OutputFile(fname_out, endianess=endianess) @@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None: def main(args_in: list[str] | None = None) -> None: + output_choices = ["f32", "f16"] + if np.uint32(1) == np.uint32(1).newbyteorder("<"): + # We currently only support Q8_0 output on little endian systems. + output_choices.append("q8_0") parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") - parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") + parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)") |