From 76be98fdec39fed876803d54e83cb4d6f2b50f90 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Sat, 14 Sep 2024 19:47:26 +0300 Subject: Improve Q5_0 performance (#55) Co-authored-by: Iwan Kawrakow --- ggml/src/ggml.c | 4 ++++ ggml/src/iqk/iqk_mul_mat.cpp | 19 ++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) (limited to 'ggml/src') diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 060d10c6..08b292b7 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -767,7 +767,11 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = { .from_float = quantize_row_q5_0, .from_float_ref = (ggml_from_float_t) quantize_row_q5_0_ref, .vec_dot = ggml_vec_dot_q5_0_q8_0, +#if GGML_USE_IQK_MULMAT && defined __AVX2__ + .vec_dot_type = GGML_TYPE_Q8_1, +#else .vec_dot_type = GGML_TYPE_Q8_0, +#endif .nrows = 1, }, [GGML_TYPE_Q5_1] = { diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 8888534c..d5790475 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -3195,11 +3195,12 @@ struct Q5_0_Dequantizer { } }; +template struct Q5_1_Dequantizer { Dequantizer4bit b4; HBitDequantizer hbit; const __m256i mh = _mm256_set1_epi8(0x10); - inline __m256i dequant(const block_q5_1 * x) const { + inline __m256i dequant(const Q5 * x) const { const __m256i vqh = _mm256_and_si256(hbit.to_bytes(x->qh), mh); return _mm256_or_si256(b4.dequant(x->qs), vqh); } @@ -3293,12 +3294,17 @@ struct Q5_0_Unpacker final : public Q_Unpacker, Q5_1_Dequantizer> { + Q5_0_1_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} + using Sum4T = Sum4TypeQ81; + inline static int block_size() { return QK5_0; } +}; struct Q4_1_Unpacker final : public Q_Unpacker { Q4_1_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} using Sum4T = Sum4Type1; inline static int block_size() { return QK4_1; } }; -struct Q5_1_Unpacker final : public Q_Unpacker { +struct Q5_1_Unpacker final : public Q_Unpacker> { Q5_1_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} using Sum4T = Sum4Type1; inline static int block_size() { return QK4_1; } @@ -3598,7 +3604,8 @@ template void MulMat::set_functions(MulMat& m) { m.funcs[7] = mul_mat_qX_0_q8_0_T; } else if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v || std::is_same_v) { + std::is_same_v || std::is_same_v || + std::is_same_v) { m.funcs[0] = mul_mat_qX_1_q8_1_T; m.funcs[1] = mul_mat_qX_1_q8_1_T; m.funcs[2] = mul_mat_qX_1_q8_1_T; @@ -3875,8 +3882,10 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) { break; case GGML_TYPE_Q5_0: assert (ne00 % QK5_0 == 0); - MulMat::set_functions(mm); - expected_typeB = GGML_TYPE_Q8_0; + //MulMat::set_functions(mm); + //expected_typeB = GGML_TYPE_Q8_0; + MulMat::set_functions(mm); + expected_typeB = GGML_TYPE_Q8_1; break; case GGML_TYPE_Q5_1: assert (ne00 % QK5_1 == 0); -- cgit v1.2.3