convert_hf_to_gguf.py : conversion from hf weights to Q6_0 (#483)

* Direct conversion from fp16 to Q6_0 * forgotten comma * More precise infos
author: Nexes the Elder <124105151+Nexesenex@users.noreply.github.com> 2025-06-03 08:30:30 +0200
committer: GitHub <noreply@github.com> 2025-06-03 09:30:30 +0300
commit: 4f8b05a0d76e6c5e47fe1f6c7bd079e0fe95dbba (patch)
tree: d744e55678ff6ab4d743d71e53156d1e512cef41 /gguf-py
parent: 7a8abe29f745cff95896095bf19cf247bdf2c661 (diff)
2 files changed, 27 insertions, 1 deletions
diff --git a/gguf-py/gguf/quants.py b/gguf-py/gguf/quants.py
index ff589b85..21c5e490 100644
--- a/gguf-py/gguf/quants.py
+++ b/gguf-py/gguf/quants.py
@@ -377,6 +377,32 @@ class Q5_1(__Quant, qtype=GGMLQuantizationType.Q5_1):
         return (d * qs) + m
 
 
+class Q6_0(__Quant, qtype=GGMLQuantizationType.Q6_0):
+    @classmethod
+    def quantize_blocks(cls, blocks: np.ndarray) -> np.ndarray:
+        n_blocks = blocks.shape[0]
+
+        imax = abs(blocks).argmax(axis=-1, keepdims=True)
+        max = np.take_along_axis(blocks, imax, axis=-1)
+
+        d = max / -32
+        with np.errstate(divide="ignore"):
+            id = np.where(d == 0, 0, 1 / d)
+        # Adapted from Q5_0
+        q = np.trunc((np.float64(blocks) * np.float64(id)) + np.float64(32.5), dtype=np.float32).astype(np.uint8).clip(0, 63)
+
+        qs = q.reshape((n_blocks, 2, cls.block_size // 2))
+        qs = (qs[..., 0, :] & np.uint8(0x0F)) | (qs[..., 1, :] << np.uint8(4))
+
+        qh = np.zeros((n_blocks, cls.block_size // 4), dtype=np.uint8)
+        for j in range(cls.block_size // 2):
+            h = ((q[:, j] >> 4) | ((q[:, j + cls.block_size // 2] >> 4) << 2)).astype(np.uint8)
+            qh[:, j % (cls.block_size // 4)] |= (h << 4 * (j // (cls.block_size // 4)))
+
+        d = d.astype(np.float16).view(np.uint8)
+
+        return np.concatenate([d, qh, qs], axis=-1)
+
 class Q8_0(__Quant, qtype=GGMLQuantizationType.Q8_0):
     @classmethod
     # Implementation of Q8_0 with bit-exact same results as reference implementation in ggml-quants.c
diff --git a/gguf-py/tests/test_quants.py b/gguf-py/tests/test_quants.py
index 8b7a85c2..4353eef2 100755
--- a/gguf-py/tests/test_quants.py
+++ b/gguf-py/tests/test_quants.py
@@ -64,7 +64,7 @@ class GGMLQuants:
         self.libggml.ggml_quantize_requires_imatrix.argtypes = (ctypes.c_int,)
 
         for t in (
-            "q4_0", "q4_1", "q5_0", "q5_1", "q8_0",
+            "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "q6_0",
             "q2_K", "q3_K", "q4_K", "q5_K", "q6_K",
             "iq2_xxs", "iq2_xs", "iq2_s", "iq3_xxs", "iq3_s", "iq1_s", "iq1_m",
             "iq4_nl", "iq4_xs",
author	Nexes the Elder <124105151+Nexesenex@users.noreply.github.com>	2025-06-03 08:30:30 +0200
committer	GitHub <noreply@github.com>	2025-06-03 09:30:30 +0300
commit	4f8b05a0d76e6c5e47fe1f6c7bd079e0fe95dbba (patch)
tree	d744e55678ff6ab4d743d71e53156d1e512cef41 /gguf-py
parent	7a8abe29f745cff95896095bf19cf247bdf2c661 (diff)