From 3d92d7f802b332927669f01bfa51ebbb56e868ba Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Thu, 15 May 2025 16:02:39 +0300
Subject: Adding IQ5_KS - 5.25 bpw quants (#422)

* iq5_ks: basics

* iq5_ks: quantize

* iq5_ks: CUDA dequantize works

* iq5_ks: dot product works on CUDA

* iq5_ks: MMQ works

* iq5_ks: Zen4

* iq5_ks: AVX2

But is is not quite right, just like iq4_k, iq5_k, iq6_k, iq4_ks.
All these need fixing on AVX2.

* iq5_ks: NEON

* iq5_ks: Metal dequantize

* iq5_ks: Metal dot product

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
---
 ggml/src/ggml-common.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'ggml/src/ggml-common.h')

diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
index 5a6417fc..1c2d1b17 100644
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@@ -687,6 +687,13 @@ typedef struct {
 } block_iq6_k;
 static_assert(sizeof(block_iq6_k) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/2 + QK_K/4 + QK_K/16, "wrong iq6_k block size/padding");
 
+typedef struct {
+    uint8_t  scales[QK_K/32];
+    uint8_t  qs[QK_K/2];
+    uint8_t  qh[QK_K/8];
+} block_iq5_ks;
+static_assert(sizeof(block_iq5_ks) == QK_K/32 + QK_K/2 + QK_K/8, "wrong iq5_ks block size/padding");
+
 
 #endif // GGML_COMMON_DECL
 #endif // GGML_COMMON_DECL
-- 
cgit v1.2.3