From 4f237d44f6d75afbb5cef39d4d6b0b35b2a517c7 Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Tue, 30 Jul 2024 16:11:25 +0300
Subject: iq3_k: Basics

Quantize/dequantize, CUDA dequantize.
PPL of LLaMA-3.1-8B is better than iq3_s and iq3_m.
---
 ggml/src/ggml-common.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'ggml/src/ggml-common.h')

diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
index 7da27794..423797b6 100644
--- a/ggml/src/ggml-common.h
+++ b/ggml/src/ggml-common.h
@@ -456,6 +456,16 @@ typedef struct {
 } block_iq2_k;
 static_assert(sizeof(block_iq2_k) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/32 + QK_K/4, "wrong iq2_k block size/padding");
 
+typedef struct {
+    ggml_half d;
+    uint16_t extra;
+    uint16_t scales_h;
+    uint8_t scales_l[QK_K/32];
+    uint8_t qs[QK_K/4];
+    uint8_t qh[QK_K/8];
+} block_iq3_k;
+static_assert(sizeof(block_iq3_k) == sizeof(ggml_half) + 2*sizeof(uint16_t) + QK_K/32 + QK_K/4 + QK_K/8, "wrong iq3_k block size/padding");
+
 typedef struct {
     ggml_half d;
     uint16_t extra;
@@ -1911,6 +1921,11 @@ GGML_TABLE_BEGIN(int8_t, iq2nl_values, 8)
     -31, -13, 1, 17,   -26, -8, 6, 22
 GGML_TABLE_END()
 
+GGML_TABLE_BEGIN(int8_t, iq3nl_values, 16)
+    -63, -40, -23, -10, 1, 13, 28,  47,
+    -59, -36, -19,  -6, 5, 17, 32,  51,
+GGML_TABLE_END()
+
 GGML_TABLE_BEGIN(int8_t, iq4k_values, 32)
     -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113,
     -123, -100, -79, -61, -45, -31, -18,  -6, 5, 17, 29, 42, 57, 73, 93, 117
-- 
cgit v1.2.3