diff options
author | compilade <git@compilade.net> | 2024-05-11 11:06:26 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-11 11:06:26 -0400 |
commit | 5a419926b0c4efab0531401aea91522aaea9fd07 (patch) | |
tree | fc04fa59a6588650a6fed70fedd8c1d4b39ec1d1 /gguf-py/gguf/constants.py | |
parent | fae9d234b6606693704eca62fe4aefbb6c6abb45 (diff) |
convert-hf : support bfloat16 conversion (#7158)
* convert-hf : support bfloat16 conversion
* gguf-py : flake8 fixes
* convert-hf : add missing space after comma
* convert-hf : get bit-exact same output as ./quantize
The quantization version was missing.
* convert-hf : don't round bf16 NANs
* convert-hf : save some memory with np.int16 intermediate bf16 weights
* convert-hf : more closely match llama.cpp with which weights to keep in f32
* convert-hf : add --outtype auto-f16
A reason for this to exist is for model quantizers who want an initial
GGUF with the most fidelity to the original model while still using
a 16-bit float type instead of 32-bit floats.
* convert-hf : remove a semicolon because flake8 doesn't like it
It's a reflex from when programming in C/C++, I guess.
* convert-hf : support outtype templating in outfile name
* convert-hf : rename --outtype auto-f16 to --outtype auto
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 44 |
1 files changed, 44 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index a4fbfc5e..978fcada 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -10,6 +10,7 @@ from typing import Any GGUF_MAGIC = 0x46554747 # "GGUF" GGUF_VERSION = 3 GGUF_DEFAULT_ALIGNMENT = 32 +GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h # # metadata keys @@ -838,6 +839,49 @@ class GGMLQuantizationType(IntEnum): BF16 = 30 +# TODO: add GGMLFileType from ggml_ftype in ggml.h + + +# from llama_ftype in llama.h +# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE. +class LlamaFileType(IntEnum): + ALL_F32 = 0 + MOSTLY_F16 = 1 # except 1d tensors + MOSTLY_Q4_0 = 2 # except 1d tensors + MOSTLY_Q4_1 = 3 # except 1d tensors + MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16 + # MOSTLY_Q4_2 = 5 # support has been removed + # MOSTLY_Q4_3 = 6 # support has been removed + MOSTLY_Q8_0 = 7 # except 1d tensors + MOSTLY_Q5_0 = 8 # except 1d tensors + MOSTLY_Q5_1 = 9 # except 1d tensors + MOSTLY_Q2_K = 10 # except 1d tensors + MOSTLY_Q3_K_S = 11 # except 1d tensors + MOSTLY_Q3_K_M = 12 # except 1d tensors + MOSTLY_Q3_K_L = 13 # except 1d tensors + MOSTLY_Q4_K_S = 14 # except 1d tensors + MOSTLY_Q4_K_M = 15 # except 1d tensors + MOSTLY_Q5_K_S = 16 # except 1d tensors + MOSTLY_Q5_K_M = 17 # except 1d tensors + MOSTLY_Q6_K = 18 # except 1d tensors + MOSTLY_IQ2_XXS = 19 # except 1d tensors + MOSTLY_IQ2_XS = 20 # except 1d tensors + MOSTLY_Q2_K_S = 21 # except 1d tensors + MOSTLY_IQ3_XS = 22 # except 1d tensors + MOSTLY_IQ3_XXS = 23 # except 1d tensors + MOSTLY_IQ1_S = 24 # except 1d tensors + MOSTLY_IQ4_NL = 25 # except 1d tensors + MOSTLY_IQ3_S = 26 # except 1d tensors + MOSTLY_IQ3_M = 27 # except 1d tensors + MOSTLY_IQ2_S = 28 # except 1d tensors + MOSTLY_IQ2_M = 29 # except 1d tensors + MOSTLY_IQ4_XS = 30 # except 1d tensors + MOSTLY_IQ1_M = 31 # except 1d tensors + MOSTLY_BF16 = 32 # except 1d tensors + + GUESSED = 1024 # not specified in the model file + + class GGUFEndian(IntEnum): LITTLE = 0 BIG = 1 |