summaryrefslogtreecommitdiff
path: root/ggml/src/ggml.c
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-03-27 05:49:16 +0100
committerGitHub <noreply@github.com>2025-03-27 05:49:16 +0100
commitd0b52076da0261f291b01f1ffa44884c8b2cdb1c (patch)
tree93abea8ae30140fbd6733af91eede57c2243e91d /ggml/src/ggml.c
parenta22250df93fd833a6cb7f310b159ad1b54e4d582 (diff)
Use bf16 instead of fp16 block scales for q8_1 (#292)
* WIP - not working * q8_0 without bells and wistles works * It works for q8_0 * Use bf16 instead of f16,int16 * q4_0_r8 * q5_0_r4 * q6_0_r4 * Also q4_1 and q5_1 * q8_0_r8 on avx2 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/ggml.c')
-rw-r--r--ggml/src/ggml.c39
1 files changed, 27 insertions, 12 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 036bd8a8..25694fc7 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -717,7 +717,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_q4_0_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -741,7 +741,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.from_float_ref = (ggml_from_float_t) quantize_row_q4_1_ref,
.vec_dot = ggml_vec_dot_q4_1_q8_1,
#if GGML_USE_IQK_MULMAT
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_1,
#endif
@@ -789,7 +789,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_q5_0_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -809,7 +809,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.from_float_ref = (ggml_from_float_t) quantize_row_q5_1_ref,
.vec_dot = ggml_vec_dot_q5_1_q8_1,
#if GGML_USE_IQK_MULMAT
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_1,
#endif
@@ -827,7 +827,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_q6_0_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -852,7 +852,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
// Remember: we cannot add 128 to the Q8 quants and use iblock sum in Q8_1 to subtract as we do on Zen4 for pure AVX2
// because there the result of the _mm256_maddubs_epi16() instruction may overflow the int16_t range
// (and it gets satured if it does), leading to wrong results.
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -897,6 +897,16 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.nrows = 1,
.row_meta_size = 0,
},
+ [GGML_TYPE_Q8_2_X4] = {
+ .type_name = "q8_2_x4",
+ .blck_size = QK8_2,
+ .type_size = sizeof(block_q8_2),
+ .is_quantized = true,
+ .from_float = quantize_row_q8_2_x4,
+ .from_float_ref = quantize_row_q8_2_x4,
+ .nrows = 1,
+ .row_meta_size = 0,
+ },
[GGML_TYPE_Q2_K] = {
.type_name = "q2_K",
.blck_size = QK_K,
@@ -1272,7 +1282,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = ggml_vec_dot_iq4_nl_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -1628,7 +1638,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = vec_dot_iq4_nl_r4_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -1662,7 +1672,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = vec_dot_q4_0_r8_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -1683,7 +1693,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = vec_dot_q8_0_r8_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -1704,7 +1714,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = vec_dot_q5_0_r4_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -1725,7 +1735,7 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
.vec_dot = vec_dot_q6_0_r4_q8_0,
#if GGML_USE_IQK_MULMAT
#if defined __AVX2__
- .vec_dot_type = GGML_TYPE_Q8_1_X4,
+ .vec_dot_type = GGML_TYPE_Q8_2_X4,
#else
.vec_dot_type = GGML_TYPE_Q8_0_X4,
#endif
@@ -11647,6 +11657,7 @@ static void ggml_compute_forward_add1(
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q8_0_X4:
case GGML_TYPE_Q8_1_X4:
+ case GGML_TYPE_Q8_2_X4:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q2_K_R4:
case GGML_TYPE_Q3_K:
@@ -11815,6 +11826,7 @@ static void ggml_compute_forward_acc(
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q8_0_X4:
case GGML_TYPE_Q8_1_X4:
+ case GGML_TYPE_Q8_2_X4:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q2_K_R4:
case GGML_TYPE_Q3_K:
@@ -15690,6 +15702,7 @@ static void ggml_compute_forward_set(
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q8_0_X4:
case GGML_TYPE_Q8_1_X4:
+ case GGML_TYPE_Q8_2_X4:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q2_K_R4:
case GGML_TYPE_Q3_K:
@@ -15997,6 +16010,7 @@ static void ggml_compute_forward_get_rows(
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q8_0_X4:
case GGML_TYPE_Q8_1_X4:
+ case GGML_TYPE_Q8_2_X4:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q2_K_R4:
case GGML_TYPE_Q3_K:
@@ -16627,6 +16641,7 @@ static void ggml_compute_forward_clamp(
case GGML_TYPE_Q8_1:
case GGML_TYPE_Q8_0_X4:
case GGML_TYPE_Q8_1_X4:
+ case GGML_TYPE_Q8_2_X4:
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q2_K_R4:
case GGML_TYPE_Q3_K: