summaryrefslogtreecommitdiff
path: root/gguf-py/gguf/constants.py
diff options
context:
space:
mode:
authorcompilade <git@compilade.net>2024-05-11 11:06:26 -0400
committerGitHub <noreply@github.com>2024-05-11 11:06:26 -0400
commit5a419926b0c4efab0531401aea91522aaea9fd07 (patch)
treefc04fa59a6588650a6fed70fedd8c1d4b39ec1d1 /gguf-py/gguf/constants.py
parentfae9d234b6606693704eca62fe4aefbb6c6abb45 (diff)
convert-hf : support bfloat16 conversion (#7158)
* convert-hf : support bfloat16 conversion * gguf-py : flake8 fixes * convert-hf : add missing space after comma * convert-hf : get bit-exact same output as ./quantize The quantization version was missing. * convert-hf : don't round bf16 NANs * convert-hf : save some memory with np.int16 intermediate bf16 weights * convert-hf : more closely match llama.cpp with which weights to keep in f32 * convert-hf : add --outtype auto-f16 A reason for this to exist is for model quantizers who want an initial GGUF with the most fidelity to the original model while still using a 16-bit float type instead of 32-bit floats. * convert-hf : remove a semicolon because flake8 doesn't like it It's a reflex from when programming in C/C++, I guess. * convert-hf : support outtype templating in outfile name * convert-hf : rename --outtype auto-f16 to --outtype auto
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r--gguf-py/gguf/constants.py44
1 files changed, 44 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index a4fbfc5e..978fcada 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -10,6 +10,7 @@ from typing import Any
GGUF_MAGIC = 0x46554747 # "GGUF"
GGUF_VERSION = 3
GGUF_DEFAULT_ALIGNMENT = 32
+GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
#
# metadata keys
@@ -838,6 +839,49 @@ class GGMLQuantizationType(IntEnum):
BF16 = 30
+# TODO: add GGMLFileType from ggml_ftype in ggml.h
+
+
+# from llama_ftype in llama.h
+# ALL VALUES SHOULD BE THE SAME HERE AS THEY ARE OVER THERE.
+class LlamaFileType(IntEnum):
+ ALL_F32 = 0
+ MOSTLY_F16 = 1 # except 1d tensors
+ MOSTLY_Q4_0 = 2 # except 1d tensors
+ MOSTLY_Q4_1 = 3 # except 1d tensors
+ MOSTLY_Q4_1_SOME_F16 = 4 # tok_embeddings.weight and output.weight are F16
+ # MOSTLY_Q4_2 = 5 # support has been removed
+ # MOSTLY_Q4_3 = 6 # support has been removed
+ MOSTLY_Q8_0 = 7 # except 1d tensors
+ MOSTLY_Q5_0 = 8 # except 1d tensors
+ MOSTLY_Q5_1 = 9 # except 1d tensors
+ MOSTLY_Q2_K = 10 # except 1d tensors
+ MOSTLY_Q3_K_S = 11 # except 1d tensors
+ MOSTLY_Q3_K_M = 12 # except 1d tensors
+ MOSTLY_Q3_K_L = 13 # except 1d tensors
+ MOSTLY_Q4_K_S = 14 # except 1d tensors
+ MOSTLY_Q4_K_M = 15 # except 1d tensors
+ MOSTLY_Q5_K_S = 16 # except 1d tensors
+ MOSTLY_Q5_K_M = 17 # except 1d tensors
+ MOSTLY_Q6_K = 18 # except 1d tensors
+ MOSTLY_IQ2_XXS = 19 # except 1d tensors
+ MOSTLY_IQ2_XS = 20 # except 1d tensors
+ MOSTLY_Q2_K_S = 21 # except 1d tensors
+ MOSTLY_IQ3_XS = 22 # except 1d tensors
+ MOSTLY_IQ3_XXS = 23 # except 1d tensors
+ MOSTLY_IQ1_S = 24 # except 1d tensors
+ MOSTLY_IQ4_NL = 25 # except 1d tensors
+ MOSTLY_IQ3_S = 26 # except 1d tensors
+ MOSTLY_IQ3_M = 27 # except 1d tensors
+ MOSTLY_IQ2_S = 28 # except 1d tensors
+ MOSTLY_IQ2_M = 29 # except 1d tensors
+ MOSTLY_IQ4_XS = 30 # except 1d tensors
+ MOSTLY_IQ1_M = 31 # except 1d tensors
+ MOSTLY_BF16 = 32 # except 1d tensors
+
+ GUESSED = 1024 # not specified in the model file
+
+
class GGUFEndian(IntEnum):
LITTLE = 0
BIG = 1