diff options
author | compilade <git@compilade.net> | 2024-05-11 11:06:26 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-11 11:06:26 -0400 |
commit | 5a419926b0c4efab0531401aea91522aaea9fd07 (patch) | |
tree | fc04fa59a6588650a6fed70fedd8c1d4b39ec1d1 /gguf-py/gguf/gguf_writer.py | |
parent | fae9d234b6606693704eca62fe4aefbb6c6abb45 (diff) |
convert-hf : support bfloat16 conversion (#7158)
* convert-hf : support bfloat16 conversion
* gguf-py : flake8 fixes
* convert-hf : add missing space after comma
* convert-hf : get bit-exact same output as ./quantize
The quantization version was missing.
* convert-hf : don't round bf16 NANs
* convert-hf : save some memory with np.int16 intermediate bf16 weights
* convert-hf : more closely match llama.cpp with which weights to keep in f32
* convert-hf : add --outtype auto-f16
A reason for this to exist is for model quantizers who want an initial
GGUF with the most fidelity to the original model while still using
a 16-bit float type instead of 32-bit floats.
* convert-hf : remove a semicolon because flake8 doesn't like it
It's a reflex from when programming in C/C++, I guess.
* convert-hf : support outtype templating in outfile name
* convert-hf : rename --outtype auto-f16 to --outtype auto
Diffstat (limited to 'gguf-py/gguf/gguf_writer.py')
-rw-r--r-- | gguf-py/gguf/gguf_writer.py | 51 |
1 files changed, 5 insertions, 46 deletions
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index 8dcf9330..96574358 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -7,7 +7,7 @@ import struct import tempfile from enum import Enum, auto from io import BufferedWriter -from typing import IO, Any, Callable, Sequence, Mapping +from typing import IO, Any, Sequence, Mapping from string import ascii_letters, digits import numpy as np @@ -28,47 +28,6 @@ from .constants import ( logger = logging.getLogger(__name__) -class LazyTensor: - data: Callable[[], np.ndarray[Any, Any]] - # to avoid too deep recursion - functions: list[Callable[[np.ndarray[Any, Any]], np.ndarray[Any, Any]]] - dtype: np.dtype[Any] - shape: tuple[int, ...] - - def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]): - self.data = data - self.functions = [] - self.dtype = np.dtype(dtype) - self.shape = shape - - def astype(self, dtype: type, **kwargs) -> LazyTensor: - self.functions.append(lambda n: n.astype(dtype, **kwargs)) - self.dtype = np.dtype(dtype) - return self - - @property - def nbytes(self) -> int: - size = 1 - for n in self.shape: - size *= n - return size * self.dtype.itemsize - - def tofile(self, *args, **kwargs) -> None: - data = self.data() - for f in self.functions: - data = f(data) - assert data.shape == self.shape - assert data.dtype == self.dtype - assert data.nbytes == self.nbytes - self.functions = [] - self.data = lambda: data - data.tofile(*args, **kwargs) - - def byteswap(self, *args, **kwargs) -> LazyTensor: - self.functions.append(lambda n: n.byteswap(*args, **kwargs)) - return self - - class WriterState(Enum): EMPTY = auto() HEADER = auto() @@ -79,7 +38,7 @@ class WriterState(Enum): class GGUFWriter: fout: BufferedWriter temp_file: tempfile.SpooledTemporaryFile[bytes] | None - tensors: list[np.ndarray[Any, Any] | LazyTensor] + tensors: list[np.ndarray[Any, Any]] _simple_value_packing = { GGUFValueType.UINT8: "B", GGUFValueType.INT8: "b", @@ -278,7 +237,7 @@ class GGUFWriter: self.ti_data_count += 1 def add_tensor( - self, name: str, tensor: np.ndarray[Any, Any] | LazyTensor, raw_shape: Sequence[int] | None = None, + self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None, raw_dtype: GGMLQuantizationType | None = None, ) -> None: if self.endianess == GGUFEndian.BIG: @@ -303,7 +262,7 @@ class GGUFWriter: if pad != 0: fp.write(bytes([0] * pad)) - def write_tensor_data(self, tensor: np.ndarray[Any, Any] | LazyTensor) -> None: + def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None: if self.state is not WriterState.TI_DATA: raise ValueError(f'Expected output file to contain tensor info, got {self.state}') @@ -391,7 +350,7 @@ class GGUFWriter: def add_name(self, name: str) -> None: self.add_string(Keys.General.NAME, name) - def add_quantization_version(self, quantization_version: GGMLQuantizationType) -> None: + def add_quantization_version(self, quantization_version: int) -> None: self.add_uint32( Keys.General.QUANTIZATION_VERSION, quantization_version) |