diff options
author | compilade <git@compilade.net> | 2024-05-08 18:16:38 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-08 18:16:38 -0400 |
commit | f98eb31c517c95960df1d0abc48002787f145f3b (patch) | |
tree | de51a7b79fa5e6488ed4f76b5d0867d6c23d3c51 /gguf-py/scripts/gguf-new-metadata.py | |
parent | bc4bba364fb96d908f2698e908648df5e6f55e02 (diff) |
convert-hf : save memory with lazy evaluation (#7075)
* convert-hf : begin refactoring write_tensor
* convert : upgrade to sentencepiece v0.2.0
* convert-hf : remove unused n_dims in extra_*_tensors
* convert-hf : simplify MoE weights stacking
* convert-hf : flake8 linter doesn't like semicolons
* convert-hf : allow unusual model part names
For example, loading `model-00001-of-00001.safetensors` now works.
* convert-hf : fix stacking MoE expert tensors
`torch.stack` and `torch.cat` don't do the same thing.
* convert-hf : fix Mamba conversion
Tested to work even with a SentencePiece-based tokenizer.
* convert : use a string for the SentencePiece tokenizer path
* convert-hf : display tensor shape
* convert-hf : convert norms to f32 by default
* convert-hf : sort model part names
`os.listdir` is said to list files in arbitrary order.
Sorting the file names should let "model-00009-of-00042.safetensors"
be loaded before "model-00010-of-00042.safetensors".
* convert-hf : use an ABC for Model again
It seems Protocol can't be used as a statically type-checked ABC,
because its subclasses also can't be instantiated. (why did it seem to work?)
At least there's still a way to throw an error when forgetting to define
the `model_arch` property of any registered Model subclasses.
* convert-hf : use a plain class for Model, and forbid direct instantiation
There are no abstract methods used anyway,
so using ABC isn't really necessary.
* convert-hf : more consistent formatting of cmdline args
* convert-hf : align the message logged for converted tensors
* convert-hf : fix Refact conversion
* convert-hf : save memory with lazy evaluation
* convert-hf : flake8 doesn't like lowercase L as a variable name
* convert-hf : remove einops requirement for InternLM2
* convert-hf : faster model parts loading
Instead of pre-loading them all into a dict, iterate on the tensors
in the model parts progressively as needed in Model.write_tensors
Conversion for some architectures relies on checking for the presence
of specific tensor names, so for multi-part models, the weight map is read
from the relevant json file to quickly get these names up-front.
* convert-hf : minor changes for consistency
* gguf-py : add tqdm as a dependency
It's small, and used for a progress bar
in GGUFWriter.write_tensors_to_file
Diffstat (limited to 'gguf-py/scripts/gguf-new-metadata.py')
-rw-r--r-- | gguf-py/scripts/gguf-new-metadata.py | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/gguf-py/scripts/gguf-new-metadata.py b/gguf-py/scripts/gguf-new-metadata.py index 3444ab41..c8e3a83d 100644 --- a/gguf-py/scripts/gguf-new-metadata.py +++ b/gguf-py/scripts/gguf-new-metadata.py @@ -7,7 +7,7 @@ import json from pathlib import Path import numpy as np -from typing import Any, Mapping, Sequence +from typing import Any, Sequence # Necessary to load the local gguf package if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists(): @@ -34,7 +34,7 @@ def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian: return host_endian -def decode_field(field: gguf.ReaderField) -> Any: +def decode_field(field: gguf.ReaderField | None) -> Any: if field and field.types: main_type = field.types[0] @@ -42,11 +42,11 @@ def decode_field(field: gguf.ReaderField) -> Any: sub_type = field.types[-1] if sub_type == gguf.GGUFValueType.STRING: - return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data] + return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data] else: return [pv for idx in field.data for pv in field.parts[idx].tolist()] if main_type == gguf.GGUFValueType.STRING: - return str(bytes(field.parts[-1]), encoding='utf8') + return str(bytes(field.parts[-1]), encoding='utf-8') else: return field.parts[-1][0] @@ -59,7 +59,7 @@ def get_field_data(reader: gguf.GGUFReader, key: str) -> Any: return decode_field(field) -def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], remove_metadata: Sequence[str]) -> None: +def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, str], remove_metadata: Sequence[str]) -> None: for field in reader.fields.values(): # Suppress virtual fields and fields written by GGUFWriter if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'): @@ -101,7 +101,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new for tensor in reader.tensors: # Dimensions are written in reverse order, so flip them first - shape = np.flipud(tensor.shape) + shape = np.flipud(tensor.shape).tolist() writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type) writer.write_header_to_file() |