From 064b99365c4426b83b09a518b29cc1ffc0250d04 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Sat, 14 Sep 2024 13:53:50 +0300 Subject: Improve Q4_0 and Q8_0 performance on AVX2/Zen4 (#54) Co-authored-by: Iwan Kawrakow --- ggml/src/ggml.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'ggml/src/ggml.c') diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 4fdf9c18..060d10c6 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -707,7 +707,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_q4_0, .from_float_ref = (ggml_from_float_t) quantize_row_q4_0_ref, .vec_dot = ggml_vec_dot_q4_0_q8_0, +#if GGML_USE_IQK_MULMAT && defined __AVX2__ + .vec_dot_type = GGML_TYPE_Q8_1, +#else .vec_dot_type = GGML_TYPE_Q8_0, +#endif #if defined (__ARM_FEATURE_MATMUL_INT8) .nrows = 2, #else @@ -788,7 +792,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float_ref = (ggml_from_float_t) quantize_row_q8_0_ref, .from_float_to_mat = quantize_mat_q8_0, .vec_dot = ggml_vec_dot_q8_0_q8_0, +#if GGML_USE_IQK_MULMAT && defined __AVX2__ + .vec_dot_type = GGML_TYPE_Q8_1, +#else .vec_dot_type = GGML_TYPE_Q8_0, +#endif #if defined (__ARM_FEATURE_MATMUL_INT8) .nrows = 2, #else -- cgit v1.2.3