From cbc83436197cde617cad696e665879c20df77daa Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Wed, 27 Mar 2024 08:44:27 +0100 Subject: Make IQ1_M work for QK_K = 64 (#6327) * iq1_m: make it work for QK_K = 64 (WIP) * iq1_m: make it work for QK_K = 64 (scalar and AVX2) * iq1_m: QK_K = 64 seems to work on Metal and ARM_NEON --------- Co-authored-by: Iwan Kawrakow --- ggml-common.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'ggml-common.h') diff --git a/ggml-common.h b/ggml-common.h index 517c9bb4..b2d67d5d 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -377,13 +377,20 @@ typedef struct { } block_iq1_s; static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding"); -// 1.8125 bpw +// 1.75 bpw typedef struct { uint8_t qs[QK_K/8]; // grid index, low 8 bits uint8_t qh[QK_K/16]; // grid index, high 3 bits + grid shift bit (for two groups of 8) - uint8_t scales[QK_K/32]; // 4-bit block scales +#if QK_K == 64 + ggml_half d; +#endif + uint8_t scales[QK_K/32]; // 3-bit block scales (4-bit if QK_K == 64) } block_iq1_m; +#if QK_K == 64 +static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32 + sizeof(ggml_half), "wrong iq1_m block size/padding"); +#else static_assert(sizeof(block_iq1_m) == QK_K/8 + QK_K/16 + QK_K/32, "wrong iq1_m block size/padding"); +#endif // Used by IQ1_M quants typedef union { -- cgit v1.2.3