From f98eb31c517c95960df1d0abc48002787f145f3b Mon Sep 17 00:00:00 2001
From: compilade <git@compilade.net>
Date: Wed, 8 May 2024 18:16:38 -0400
Subject: convert-hf : save memory with lazy evaluation (#7075)

* convert-hf : begin refactoring write_tensor

* convert : upgrade to sentencepiece v0.2.0

* convert-hf : remove unused n_dims in extra_*_tensors

* convert-hf : simplify MoE weights stacking

* convert-hf : flake8 linter doesn't like semicolons

* convert-hf : allow unusual model part names

For example, loading `model-00001-of-00001.safetensors` now works.

* convert-hf : fix stacking MoE expert tensors

`torch.stack` and `torch.cat` don't do the same thing.

* convert-hf : fix Mamba conversion

Tested to work even with a SentencePiece-based tokenizer.

* convert : use a string for the SentencePiece tokenizer path

* convert-hf : display tensor shape

* convert-hf : convert norms to f32 by default

* convert-hf : sort model part names

`os.listdir` is said to list files in arbitrary order.
Sorting the file names should let "model-00009-of-00042.safetensors"
be loaded before "model-00010-of-00042.safetensors".

* convert-hf : use an ABC for Model again

It seems Protocol can't be used as a statically type-checked ABC,
because its subclasses also can't be instantiated. (why did it seem to work?)

At least there's still a way to throw an error when forgetting to define
the `model_arch` property of any registered Model subclasses.

* convert-hf : use a plain class for Model, and forbid direct instantiation

There are no abstract methods used anyway,
so using ABC isn't really necessary.

* convert-hf : more consistent formatting of cmdline args

* convert-hf : align the message logged for converted tensors

* convert-hf : fix Refact conversion

* convert-hf : save memory with lazy evaluation

* convert-hf : flake8 doesn't like lowercase L as a variable name

* convert-hf : remove einops requirement for InternLM2

* convert-hf : faster model parts loading

Instead of pre-loading them all into a dict, iterate on the tensors
in the model parts progressively as needed in Model.write_tensors

Conversion for some architectures relies on checking for the presence
of specific tensor names, so for multi-part models, the weight map is read
from the relevant json file to quickly get these names up-front.

* convert-hf : minor changes for consistency

* gguf-py : add tqdm as a dependency

It's small, and used for a progress bar
in GGUFWriter.write_tensors_to_file
---
 gguf-py/scripts/gguf-dump.py         |  2 +-
 gguf-py/scripts/gguf-new-metadata.py | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'gguf-py/scripts')

diff --git a/gguf-py/scripts/gguf-dump.py b/gguf-py/scripts/gguf-dump.py
index 2d3c3943..1a37a7b9 100755
--- a/gguf-py/scripts/gguf-dump.py
+++ b/gguf-py/scripts/gguf-dump.py
@@ -47,7 +47,7 @@ def dump_metadata(reader: GGUFReader, args: argparse.Namespace) -> None:
         if len(field.types) == 1:
             curr_type = field.types[0]
             if curr_type == GGUFValueType.STRING:
-                log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf8')[:60]))
+                log_message += ' = {0}'.format(repr(str(bytes(field.parts[-1]), encoding='utf-8')[:60]))
             elif field.types[0] in reader.gguf_scalar_to_np:
                 log_message += ' = {0}'.format(field.parts[-1][0])
         print(log_message)  # noqa: NP100
diff --git a/gguf-py/scripts/gguf-new-metadata.py b/gguf-py/scripts/gguf-new-metadata.py
index 3444ab41..c8e3a83d 100644
--- a/gguf-py/scripts/gguf-new-metadata.py
+++ b/gguf-py/scripts/gguf-new-metadata.py
@@ -7,7 +7,7 @@ import json
 from pathlib import Path
 
 import numpy as np
-from typing import Any, Mapping, Sequence
+from typing import Any, Sequence
 
 # Necessary to load the local gguf package
 if "NO_LOCAL_GGUF" not in os.environ and (Path(__file__).parent.parent.parent / 'gguf-py').exists():
@@ -34,7 +34,7 @@ def get_byteorder(reader: gguf.GGUFReader) -> gguf.GGUFEndian:
         return host_endian
 
 
-def decode_field(field: gguf.ReaderField) -> Any:
+def decode_field(field: gguf.ReaderField | None) -> Any:
     if field and field.types:
         main_type = field.types[0]
 
@@ -42,11 +42,11 @@ def decode_field(field: gguf.ReaderField) -> Any:
             sub_type = field.types[-1]
 
             if sub_type == gguf.GGUFValueType.STRING:
-                return [str(bytes(field.parts[idx]), encoding='utf8') for idx in field.data]
+                return [str(bytes(field.parts[idx]), encoding='utf-8') for idx in field.data]
             else:
                 return [pv for idx in field.data for pv in field.parts[idx].tolist()]
         if main_type == gguf.GGUFValueType.STRING:
-            return str(bytes(field.parts[-1]), encoding='utf8')
+            return str(bytes(field.parts[-1]), encoding='utf-8')
         else:
             return field.parts[-1][0]
 
@@ -59,7 +59,7 @@ def get_field_data(reader: gguf.GGUFReader, key: str) -> Any:
     return decode_field(field)
 
 
-def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: Mapping[str, str], remove_metadata: Sequence[str]) -> None:
+def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new_metadata: dict[str, str], remove_metadata: Sequence[str]) -> None:
     for field in reader.fields.values():
         # Suppress virtual fields and fields written by GGUFWriter
         if field.name == gguf.Keys.General.ARCHITECTURE or field.name.startswith('GGUF.'):
@@ -101,7 +101,7 @@ def copy_with_new_metadata(reader: gguf.GGUFReader, writer: gguf.GGUFWriter, new
 
     for tensor in reader.tensors:
         # Dimensions are written in reverse order, so flip them first
-        shape = np.flipud(tensor.shape)
+        shape = np.flipud(tensor.shape).tolist()
         writer.add_tensor_info(tensor.name, shape, tensor.data.dtype, tensor.data.nbytes, tensor.tensor_type)
 
     writer.write_header_to_file()
-- 
cgit v1.2.3