From 8cf19d60dc93809db8e51fedc811595eed9134c5 Mon Sep 17 00:00:00 2001 From: Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> Date: Fri, 20 Oct 2023 06:19:40 -0500 Subject: gguf : support big endian platform (#3552) * check whether platform is 390x if yes->do not import immintrin.h * support s390x big endian * support --bigendian option for s390x 1. verified with baichuan7b-chat with float 16 on s390x 2. verified with baichuan7b-chat 3. verified with chinese-alpaca-2-13b-f16 * update format based on editor-config checker result * Update convert-baichuan-hf-to-gguf.py * 1. check in ggml.c if endianess is not match 2. update GGUF version 3. change get_pack_prefix to property 4. update information log * always use "GGUF" as beginng of GGUF file * Compare "GGUF" with file header char by char 1. Set GGUF_MAGIC to "GGUF" string instead of int value 2. Compare "GGUF" char by char to ensure its byte order 3. Move bytes swap code from convert.py to gguf.py write_tensor_data --------- Co-authored-by: Georgi Gerganov --- gguf-py/gguf/gguf.py | 73 +++++++++++++++++++++++++++++++------------------- gguf-py/pyproject.toml | 2 +- 2 files changed, 47 insertions(+), 28 deletions(-) (limited to 'gguf-py') diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 557ce7ac..072c839c 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -19,9 +19,10 @@ import numpy as np # GGUF_MAGIC = 0x46554747 -GGUF_VERSION = 2 +GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 + # general KEY_GENERAL_ARCHITECTURE = "general.architecture" KEY_GENERAL_QUANTIZATION_VERSION = "general.quantization_version" @@ -597,6 +598,10 @@ class GGMLQuantizationType(IntEnum): Q6_K = 14 Q8_K = 15 +class GGUFEndian(IntEnum): + LITTLE = 0 + BIG = 1 + class GGUFValueType(IntEnum): UINT8 = 0 @@ -644,18 +649,41 @@ class GGUFWriter: temp_file: tempfile.SpooledTemporaryFile[bytes] | None = None tensors: list[tuple[np.ndarray[Any, Any], int]] - def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True): + @property + def pack_prefix(self): + if self.endianess==GGUFEndian.LITTLE: + return "<" + else: + return ">" + + def __init__(self, path: os.PathLike[str] | str, arch: str, use_temp_file = True, endianess=GGUFEndian.LITTLE): self.fout = open(path, "wb") self.arch = arch + self.endianess = endianess + self._simple_value_packing = { + GGUFValueType.UINT8: f"{self.pack_prefix}B", + GGUFValueType.INT8: f"{self.pack_prefix}b", + GGUFValueType.UINT16: f"{self.pack_prefix}H", + GGUFValueType.INT16: f"{self.pack_prefix}h", + GGUFValueType.UINT32: f"{self.pack_prefix}I", + GGUFValueType.INT32: f"{self.pack_prefix}i", + GGUFValueType.FLOAT32: f"{self.pack_prefix}f", + GGUFValueType.UINT64: f"{self.pack_prefix}Q", + GGUFValueType.INT64: f"{self.pack_prefix}q", + GGUFValueType.FLOAT64: f"{self.pack_prefix}d", + GGUFValueType.BOOL: "?" , + } self.add_architecture() self.use_temp_file = use_temp_file self.tensors = [] + endianess_str = "Big Endian" if self.endianess == GGUFEndian.BIG else "Little Endian" + print(f"This gguf file is for {endianess_str} only") def write_header_to_file(self): self.fout.write(struct.pack(" 0: ltype = GGUFValueType.get_type(val[0]) if not all(GGUFValueType.get_type(i) is ltype for i in val[1:]): raise ValueError("All items in a GGUF array should be of the same type") - self.kv_data += struct.pack(""] packages = [ -- cgit v1.2.3