summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py2
-rw-r--r--gguf-py/gguf/gguf_reader.py8
-rw-r--r--gguf-py/gguf/gguf_writer.py77
-rw-r--r--gguf-py/gguf/vocab.py6
-rw-r--r--gguf-py/pyproject.toml1
-rwxr-xr-xgguf-py/scripts/gguf-dump.py2
-rw-r--r--gguf-py/scripts/gguf-new-metadata.py12
7 files changed, 84 insertions, 24 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 6e968fc4..5951c0bb 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -860,7 +860,7 @@ class GGUFValueType(IntEnum):
# Note: Does not support GGML_QKK_64
QK_K = 256
# Items here are (block size, type size)
-GGML_QUANT_SIZES = {
+GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.F32: (1, 4),
GGMLQuantizationType.F16: (1, 2),
GGMLQuantizationType.Q4_0: (32, 2 + 16),
diff --git a/gguf-py/gguf/gguf_reader.py b/gguf-py/gguf/gguf_reader.py
index db8525d8..21b089f8 100644
--- a/gguf-py/gguf/gguf_reader.py
+++ b/gguf-py/gguf/gguf_reader.py
@@ -65,7 +65,7 @@ class ReaderTensor(NamedTuple):
class GGUFReader:
# I - same as host, S - swapped
- byte_order: Literal['I' | 'S'] = 'I'
+ byte_order: Literal['I'] | Literal['S'] = 'I'
alignment: int = GGUF_DEFAULT_ALIGNMENT
# Note: Internal helper, API may change.
@@ -83,7 +83,7 @@ class GGUFReader:
GGUFValueType.BOOL: np.bool_,
}
- def __init__(self, path: os.PathLike[str] | str, mode: Literal['r' | 'r+' | 'c'] = 'r'):
+ def __init__(self, path: os.PathLike[str] | str, mode: Literal['r'] | Literal['r+'] | Literal['c'] = 'r'):
self.data = np.memmap(path, mode = mode)
offs = 0
if self._get(offs, np.uint32, override_order = '<')[0] != GGUF_MAGIC:
@@ -128,7 +128,7 @@ class GGUFReader:
return self.tensors[idx]
def _get(
- self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I' | 'S' | '<'] = None,
+ self, offset: int, dtype: npt.DTypeLike, count: int = 1, override_order: None | Literal['I'] | Literal['S'] | Literal['<'] = None,
) -> npt.NDArray[Any]:
count = int(count)
itemsize = int(np.empty([], dtype = dtype).itemsize)
@@ -250,7 +250,7 @@ class GGUFReader:
raise ValueError(f'Found duplicated tensor with name {tensor_name}')
tensor_names.add(tensor_name)
ggml_type = GGMLQuantizationType(raw_dtype[0])
- n_elems = np.prod(dims)
+ n_elems = int(np.prod(dims))
block_size, type_size = GGML_QUANT_SIZES[ggml_type]
n_bytes = n_elems * type_size // block_size
data_offs = int(start_offs + offset_tensor[0])
diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py
index d9cfbf71..8dcf9330 100644
--- a/gguf-py/gguf/gguf_writer.py
+++ b/gguf-py/gguf/gguf_writer.py
@@ -7,7 +7,7 @@ import struct
import tempfile
from enum import Enum, auto
from io import BufferedWriter
-from typing import IO, Any, Sequence, Mapping
+from typing import IO, Any, Callable, Sequence, Mapping
from string import ascii_letters, digits
import numpy as np
@@ -28,6 +28,47 @@ from .constants import (
logger = logging.getLogger(__name__)
+class LazyTensor:
+ data: Callable[[], np.ndarray[Any, Any]]
+ # to avoid too deep recursion
+ functions: list[Callable[[np.ndarray[Any, Any]], np.ndarray[Any, Any]]]
+ dtype: np.dtype[Any]
+ shape: tuple[int, ...]
+
+ def __init__(self, data: Callable[[], np.ndarray[Any, Any]], *, dtype: type, shape: tuple[int, ...]):
+ self.data = data
+ self.functions = []
+ self.dtype = np.dtype(dtype)
+ self.shape = shape
+
+ def astype(self, dtype: type, **kwargs) -> LazyTensor:
+ self.functions.append(lambda n: n.astype(dtype, **kwargs))
+ self.dtype = np.dtype(dtype)
+ return self
+
+ @property
+ def nbytes(self) -> int:
+ size = 1
+ for n in self.shape:
+ size *= n
+ return size * self.dtype.itemsize
+
+ def tofile(self, *args, **kwargs) -> None:
+ data = self.data()
+ for f in self.functions:
+ data = f(data)
+ assert data.shape == self.shape
+ assert data.dtype == self.dtype
+ assert data.nbytes == self.nbytes
+ self.functions = []
+ self.data = lambda: data
+ data.tofile(*args, **kwargs)
+
+ def byteswap(self, *args, **kwargs) -> LazyTensor:
+ self.functions.append(lambda n: n.byteswap(*args, **kwargs))
+ return self
+
+
class WriterState(Enum):
EMPTY = auto()
HEADER = auto()
@@ -38,7 +79,7 @@ class WriterState(Enum):
class GGUFWriter:
fout: BufferedWriter
temp_file: tempfile.SpooledTemporaryFile[bytes] | None
- tensors: list[np.ndarray[Any, Any]]
+ tensors: list[np.ndarray[Any, Any] | LazyTensor]
_simple_value_packing = {
GGUFValueType.UINT8: "B",
GGUFValueType.INT8: "b",
@@ -176,7 +217,7 @@ class GGUFWriter:
if pack_fmt is not None:
self.kv_data += self._pack(pack_fmt, val, skip_pack_prefix = vtype == GGUFValueType.BOOL)
elif vtype == GGUFValueType.STRING:
- encoded_val = val.encode("utf8") if isinstance(val, str) else val
+ encoded_val = val.encode("utf-8") if isinstance(val, str) else val
self.kv_data += self._pack("Q", len(encoded_val))
self.kv_data += encoded_val
elif vtype == GGUFValueType.ARRAY and isinstance(val, Sequence) and val:
@@ -205,7 +246,7 @@ class GGUFWriter:
raise ValueError(f'Duplicated tensor name {name}')
self.ti_names.add(name)
- encoded_name = name.encode("utf8")
+ encoded_name = name.encode("utf-8")
self.ti_data += self._pack("Q", len(encoded_name))
self.ti_data += encoded_name
n_dims = len(tensor_shape)
@@ -237,7 +278,7 @@ class GGUFWriter:
self.ti_data_count += 1
def add_tensor(
- self, name: str, tensor: np.ndarray[Any, Any], raw_shape: Sequence[int] | None = None,
+ self, name: str, tensor: np.ndarray[Any, Any] | LazyTensor, raw_shape: Sequence[int] | None = None,
raw_dtype: GGMLQuantizationType | None = None,
) -> None:
if self.endianess == GGUFEndian.BIG:
@@ -262,7 +303,7 @@ class GGUFWriter:
if pad != 0:
fp.write(bytes([0] * pad))
- def write_tensor_data(self, tensor: np.ndarray[Any, Any]) -> None:
+ def write_tensor_data(self, tensor: np.ndarray[Any, Any] | LazyTensor) -> None:
if self.state is not WriterState.TI_DATA:
raise ValueError(f'Expected output file to contain tensor info, got {self.state}')
@@ -272,15 +313,33 @@ class GGUFWriter:
tensor.tofile(self.fout)
self.write_padding(self.fout, tensor.nbytes)
- def write_tensors_to_file(self) -> None:
+ def write_tensors_to_file(self, *, progress: bool = False) -> None:
self.write_ti_data_to_file()
self.write_padding(self.fout, self.fout.tell())
if self.temp_file is None:
+ self.tensors.reverse() # to pop from the "beginning" in constant time
+
+ if progress:
+ from tqdm import tqdm
+
+ total_bytes = sum(t.nbytes for t in self.tensors)
+
+ bar = tqdm(desc="Writing", total=total_bytes, unit="byte", unit_scale=True)
+
+ while True:
+ try:
+ tensor = self.tensors.pop()
+ except IndexError:
+ break
+ tensor.tofile(self.fout)
+ bar.update(tensor.nbytes)
+ self.write_padding(self.fout, tensor.nbytes)
+ return
while True:
try:
- tensor = self.tensors.pop(0)
+ tensor = self.tensors.pop()
except IndexError:
break
tensor.tofile(self.fout)
@@ -479,7 +538,7 @@ class GGUFWriter:
self.add_bool(Keys.Tokenizer.ADD_PREFIX, value)
def add_chat_template(self, value: str | Sequence[Mapping[str, str]]) -> None:
- if isinstance(value, list):
+ if not isinstance(value, str):
template_default = None
template_names = set()
diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py
index c97a78f3..3ba99be4 100644
--- a/gguf-py/gguf/vocab.py
+++ b/gguf-py/gguf/vocab.py
@@ -4,7 +4,7 @@ import logging
import json
import os
from pathlib import Path
-from typing import Any, Callable
+from typing import Any, Callable, Sequence, Mapping, Iterable
from .gguf_writer import GGUFWriter
@@ -15,11 +15,11 @@ class SpecialVocab:
merges: list[str]
add_special_token: dict[str, bool]
special_token_ids: dict[str, int]
- chat_template: str | None
+ chat_template: str | Sequence[Mapping[str, str]] | None
def __init__(
self, path: str | os.PathLike[str], load_merges: bool = False,
- special_token_types: tuple[str, ...] | None = None,
+ special_token_types: Iterable[str] | None = None,
n_vocab: int | None = None,
):
self.special_token_ids = {}
diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml
index d1d876d6..36e63ee3 100644
--- a/gguf-py/pyproject.toml
+++ b/gguf-py/pyproject.toml
@@ -21,6 +21,7 @@ classifiers = [
[tool.poetry.dependencies]
python = ">=3.8"
numpy = ">=1.17"
+tqdm = ">=4.27"
[tool.poetry.dev-dependencies]
pytest = "^5.2"
diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py
index 2d3c3943..1a37a7b9 100755
--- a/gguf-py/scripts/gguf-dump.py
+++ b/gguf-py/scripts/gguf-dump.py
@@ -47,7 +47,7 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
if len(field.types) == 1:
curr_type = field.types[0]
if curr_type == GGUFValueType.STRING:
- log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60]))
+ log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
elif field.types[0] in reader.gguf_scalar_to_np:
log_message += ' = {0}'.format(field.parts[-1][0])
print(log_message) # noqa: NP100
diff --git a/gguf-py/scripts/gguf-new-metadata.py b/gguf-py/scripts/gguf-new-metadata.py
index 3444ab41..c8e3a83d 100644
--- a/gguf-py/scripts/gguf-new-metadata.py
+++ b/gguf-py/scripts/gguf-new-metadata.py
@@ -7,7 +7,7 @@ import json
from pathlib import Path
import numpy as np
-from typing import Any, Mapping, Sequence
+from typing import Any, Sequence
# Necessary to load the local gguf package
if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
@@ -34,7 +34,7 @@ def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
return host_endian
-def decode_field(field: gguf.ReaderField) -> Any:
+def decode_field(field: gguf.ReaderField | None) -> Any:
if field and field.types:
main_type = field.types[0]
@@ -42,11 +42,11 @@ def decode_field(field: gguf.ReaderField) -> Any:
sub_type = field.types[-1]
if sub_type == gguf.GGUFValueType.STRING:
- return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
+ return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
else:
return [pv for idx in field.data for pv in field.parts[idx].tolist()]
if main_type == gguf.GGUFValueType.STRING:
- return str(bytes(field.parts[-1]), encoding='utf8')
+ return str(bytes(field.parts[-1]), encoding='utf-8')
else:
return field.parts[-1][0]
@@ -59,7 +59,7 @@ def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
return decode_field(field)
-def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], remove_metadata: Sequence[str]) -> None:
+def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, str], remove_metadata: Sequence[str]) -> None:
for field in reader.fields.values():
# Suppress virtual fields and fields written by GGUFWriter
if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
@@ -101,7 +101,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
for tensor in reader.tensors:
# Dimensions are written in reverse order, so flip them first
- shape = np.flipud(tensor.shape)
+ shape = np.flipud(tensor.shape).tolist()
writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
writer.write_header_to_file()