summaryrefslogtreecommitdiff
path: root/gguf-py
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-05-23 10:00:21 +0300
committerGitHub <noreply@github.com>2024-05-23 10:00:21 +0300
commite84b71c2c6da6e69c8f815168ea836f9716a325e (patch)
treeaa5c046cc5f0e9e953de5329412a753d82e60589 /gguf-py
parent1b1e27cb49158123ef4902aa41eb368c9e76e6a1 (diff)
ggml : drop support for QK_K=64 (#7473)
* ggml : drop support for QK_K=64 ggml-ci * opencl : restore QK_K=256 define
Diffstat (limited to 'gguf-py')
-rw-r--r--gguf-py/gguf/constants.py3
1 files changed, 1 insertions, 2 deletions
diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py
index 42df2e4d..67e23dcc 100644
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -905,9 +905,8 @@ class GGUFValueType(IntEnum):
raise ValueError(f"Unknown type: {type(val)}")
-# Note: Does not support GGML_QKK_64
-QK_K = 256
# Items here are (block size, type size)
+QK_K = 256
GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
GGMLQuantizationType.F32: (1, 4),
GGMLQuantizationType.F16: (1, 2),