From 34b0a082074b073eb14c2bd93c0c070e20ddcd16 Mon Sep 17 00:00:00 2001
From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
Date: Fri, 10 Nov 2023 22:04:50 -0700
Subject: gguf-py: Refactor and allow reading/modifying existing GGUF files
 (#3981)

* gguf-py: Refactor and add file reading support

* Replay changes from #3871

Credit to @cebtenzzre for that pull

* Various type annotation fixes.

* sort imports with isort (again)

* Fix missing return statement in add_tensor

* style cleanup with flake8

* fix NamedTuple and Enum usage

* Fix an issue with state init in GGUFReader

Move examples to an examples/ directory

Clean up examples

Add an example of modifying keys in a GGUF file

Update documentation with info on examples

Try to support people importing gguf/gguf.py directly

* Damagage is not a word.

* Clean up gguf-py/examples/modify_gguf.py whitespace

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Update gguf-py/examples/modify_gguf.py formatting

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Update gguf-py/gguf/gguf_reader.py type hint

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* Make examples executable, formatting changes

* Add more information to GGUFReader and examples comments

* Include a gguf Python package version bump

* Add convert-gguf-endian.py script

* cleanup

* gguf-py : bump minor version

* Reorganize scripts

* Make GGUFReader endian detection less arbitrary

* Add JSON dumping support to gguf-dump.py

Which I kind of regret now

* A few for gguf-dump.py cleanups

* Murder accidental tuple in gguf-py/scripts/gguf-dump.py

Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>

* cleanup

* constants : remove unneeded type annotations

* fix python 3.8 compat

* Set up gguf- scripts in pyproject.toml

* And include scripts/__init__.py, derp

* convert.py: We can't currently support Q8_0 on big endian.

* gguf-py: SpecialVocab: Always try available sources for special token ids

gguf-py: SpecialVocab: Try to load merges from merges.txt if not in tokenizer.json

gguf-py: SpecialVocab: Add 'add_bos_token' type bools to GGUF metadata
u

* cleanup

* Promote add_X_token to GGUF metadata for BOS and EOS

---------

Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
---
 convert.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'convert.py')

diff --git a/convert.py b/convert.py
index b0f44dbe..a4b87e08 100755
--- a/convert.py
+++ b/convert.py
@@ -3,11 +3,9 @@ from __future__ import annotations
 
 import argparse
 import concurrent.futures
-import copy
 import enum
 import faulthandler
 import functools
-import io
 import itertools
 import json
 import math
@@ -23,14 +21,14 @@ from abc import ABCMeta, abstractmethod
 from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
 from dataclasses import dataclass
 from pathlib import Path
-from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar
+from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar
 
 import numpy as np
 from sentencepiece import SentencePieceProcessor
 
 import os
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
+    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
 if TYPE_CHECKING:
@@ -851,7 +849,7 @@ class OutputFile:
         elif isinstance(vocab, BpeVocab):
             self.gguf.add_tokenizer_model("gpt2")
         else:
-            raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab')
+            raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab')
         self.gguf.add_token_list(tokens)
         self.gguf.add_token_scores(scores)
         self.gguf.add_token_types(toktypes)
@@ -905,7 +903,7 @@ class OutputFile:
         return dt.quantize(arr)
 
     @staticmethod
-    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None:
+    def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None:
         check_vocab_size(params, vocab)
 
         of = OutputFile(fname_out, endianess=endianess)
@@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None:
 
 
 def main(args_in: list[str] | None = None) -> None:
+    output_choices = ["f32", "f16"]
+    if np.uint32(1) == np.uint32(1).newbyteorder("<"):
+        # We currently only support Q8_0 output on little endian systems.
+        output_choices.append("q8_0")
     parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file")
     parser.add_argument("--dump",        action="store_true",    help="don't convert, just show what's in the model")
     parser.add_argument("--dump-single", action="store_true",    help="don't convert, just show what's in a single model file")
     parser.add_argument("--vocab-only",  action="store_true",    help="extract only the vocab")
-    parser.add_argument("--outtype",     choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
+    parser.add_argument("--outtype",     choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
     parser.add_argument("--vocab-dir",   type=Path,              help="directory containing tokenizer.model, if separate from model file")
     parser.add_argument("--outfile",     type=Path,              help="path to write to; default: based on input")
     parser.add_argument("model",         type=Path,              help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)")
-- 
cgit v1.2.3