diff options
author | Ondřej Čertík <ondrej@certik.us> | 2024-03-14 04:40:14 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-14 12:40:14 +0200 |
commit | 3ca23481dd309bd51cc31c73a4cc34f922cc372f (patch) | |
tree | eab44b8528241eab625347f2a5791a3f64d682d0 /gguf-py/gguf/constants.py | |
parent | 3fe8d7a17f84bd721cd4d8db35365da44b69f68b (diff) |
gguf-py : add support for I8, I16 and I32 (#6045)
* Refactor dtype handling to be extensible
This code is equivalent as before, but now it is prepared to easily add
more NumPy dtypes.
* Add support for I8, I16 and I32
These types are allowed in the GGUF specification.
* Add support for I8, I16 and I32 to gguf_writer
* Add support for I8, I16, I32 to gguf_reader
Diffstat (limited to 'gguf-py/gguf/constants.py')
-rw-r--r-- | gguf-py/gguf/constants.py | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index b23badb1..99f71f0a 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -661,6 +661,9 @@ class GGMLQuantizationType(IntEnum): IQ3_S = 21 IQ2_S = 22 IQ4_XS = 23 + I8 = 24 + I16 = 25 + I32 = 26 class GGUFEndian(IntEnum): @@ -727,6 +730,9 @@ GGML_QUANT_SIZES = { GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4), GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16), GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64), + GGMLQuantizationType.I8: (1, 1), + GGMLQuantizationType.I16: (1, 2), + GGMLQuantizationType.I32: (1, 4), } |