diff options
author | Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> | 2023-11-10 22:04:50 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-11 08:04:50 +0300 |
commit | 34b0a082074b073eb14c2bd93c0c070e20ddcd16 (patch) | |
tree | 6bbc77442dac1294ae1f74f48a722437cadf1cd0 /convert.py | |
parent | 4a4fd3eefad5bd17ab6bcd8e2181b4f62eae76cf (diff) |
gguf-py: Refactor and allow reading/modifying existing GGUF files (#3981)
* gguf-py: Refactor and add file reading support
* Replay changes from #3871
Credit to @cebtenzzre for that pull
* Various type annotation fixes.
* sort imports with isort (again)
* Fix missing return statement in add_tensor
* style cleanup with flake8
* fix NamedTuple and Enum usage
* Fix an issue with state init in GGUFReader
Move examples to an examples/ directory
Clean up examples
Add an example of modifying keys in a GGUF file
Update documentation with info on examples
Try to support people importing gguf/gguf.py directly
* Damagage is not a word.
* Clean up gguf-py/examples/modify_gguf.py whitespace
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
* Update gguf-py/examples/modify_gguf.py formatting
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
* Update gguf-py/gguf/gguf_reader.py type hint
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
* Make examples executable, formatting changes
* Add more information to GGUFReader and examples comments
* Include a gguf Python package version bump
* Add convert-gguf-endian.py script
* cleanup
* gguf-py : bump minor version
* Reorganize scripts
* Make GGUFReader endian detection less arbitrary
* Add JSON dumping support to gguf-dump.py
Which I kind of regret now
* A few for gguf-dump.py cleanups
* Murder accidental tuple in gguf-py/scripts/gguf-dump.py
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
* cleanup
* constants : remove unneeded type annotations
* fix python 3.8 compat
* Set up gguf- scripts in pyproject.toml
* And include scripts/__init__.py, derp
* convert.py: We can't currently support Q8_0 on big endian.
* gguf-py: SpecialVocab: Always try available sources for special token ids
gguf-py: SpecialVocab: Try to load merges from merges.txt if not in tokenizer.json
gguf-py: SpecialVocab: Add 'add_bos_token' type bools to GGUF metadata
u
* cleanup
* Promote add_X_token to GGUF metadata for BOS and EOS
---------
Co-authored-by: Jared Van Bortel <jared@nomic.ai>
Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
Diffstat (limited to 'convert.py')
-rwxr-xr-x | convert.py | 16 |
1 files changed, 9 insertions, 7 deletions
@@ -3,11 +3,9 @@ from __future__ import annotations import argparse import concurrent.futures -import copy import enum import faulthandler import functools -import io import itertools import json import math @@ -23,14 +21,14 @@ from abc import ABCMeta, abstractmethod from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor from dataclasses import dataclass from pathlib import Path -from typing import IO, TYPE_CHECKING, Any, Callable, Generator, Iterable, Literal, Sequence, TypeVar +from typing import IO, TYPE_CHECKING, Any, Callable, Iterable, Literal, TypeVar import numpy as np from sentencepiece import SentencePieceProcessor import os if 'NO_LOCAL_GGUF' not in os.environ: - sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf')) + sys.path.insert(1, str(Path(__file__).parent / 'gguf-py')) import gguf if TYPE_CHECKING: @@ -851,7 +849,7 @@ class OutputFile: elif isinstance(vocab, BpeVocab): self.gguf.add_tokenizer_model("gpt2") else: - raise ValueError(f'Unknown vocab type: Not BpeVocab or SentencePieceVocab') + raise ValueError('Unknown vocab type: Not BpeVocab or SentencePieceVocab') self.gguf.add_token_list(tokens) self.gguf.add_token_scores(scores) self.gguf.add_token_types(toktypes) @@ -905,7 +903,7 @@ class OutputFile: return dt.quantize(arr) @staticmethod - def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess=gguf.GGUFEndian.LITTLE) -> None: + def write_all(fname_out: Path, ftype: GGMLFileType, params: Params, model: LazyModel, vocab: Vocab, svocab: gguf.SpecialVocab, concurrency: int = DEFAULT_CONCURRENCY, endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE) -> None: check_vocab_size(params, vocab) of = OutputFile(fname_out, endianess=endianess) @@ -1114,11 +1112,15 @@ def do_dump_model(model_plus: ModelPlus) -> None: def main(args_in: list[str] | None = None) -> None: + output_choices = ["f32", "f16"] + if np.uint32(1) == np.uint32(1).newbyteorder("<"): + # We currently only support Q8_0 output on little endian systems. + output_choices.append("q8_0") parser = argparse.ArgumentParser(description="Convert a LLaMa model to a GGML compatible file") parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model") parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file") parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab") - parser.add_argument("--outtype", choices=["f32", "f16", "q8_0"], help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") + parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)") parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file") parser.add_argument("--outfile", type=Path, help="path to write to; default: based on input") parser.add_argument("model", type=Path, help="directory containing model file, or model file itself (*.pth, *.pt, *.bin)") |