From caa42ccc560d2c76addcdef8edcd0a570410e9f8 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Fri, 21 Jun 2024 18:51:44 +0300 Subject: iqk_mul_mat: add IQ4_NL I never use it, so I had completely forgotten about it. --- iqk_mul_mat.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'iqk_mul_mat.cpp') diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index f8ce62a9..2552444d 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -2330,6 +2330,19 @@ struct Q4_0_Dequantizer { } }; +struct IQ4_NL_Dequantizer { + Dequantizer4bit b4; + const __m256i values = load_values(); + inline __m256i dequant(const block_iq4_nl * x) const { + return _mm256_shuffle_epi8(values, b4.dequant(x->qs)); + } + static __m256i load_values() { + static const int8_t iq4nl_values[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; + auto aux = _mm_loadu_si128((const __m128i *)iq4nl_values); + return MM256_SET_M128I(aux, aux); + } +}; + struct Q4_1_Dequantizer { Dequantizer4bit b4; inline __m256i dequant(const block_q4_1 * x) const { @@ -2413,6 +2426,11 @@ struct Q4_0_Unpacker final : public Q_Unpacker { + IQ4_NL_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} + using Sum4T = Sum4TypeQ80; + inline static int block_size() { return QK4_NL; } +}; struct Q5_0_Unpacker final : public Q_Unpacker { Q5_0_Unpacker(const void * vx, size_t bx) : Q_Unpacker(vx, bx) {} using Sum4T = Sum4TypeQ80; @@ -2607,7 +2625,7 @@ void mul_mat_q80_q80_T(int n, const void * vx, size_t bx, const DataInfo& info, template void MulMat::set_functions(MulMat& m) { if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v) { + std::is_same_v || std::is_same_v) { m.funcs[0] = mul_mat_qX_0_q8_0_T; m.funcs[1] = mul_mat_qX_0_q8_0_T; m.funcs[2] = mul_mat_qX_0_q8_0_T; @@ -2808,6 +2826,11 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) { MulMat::set_functions(mm); expected_typeB = GGML_TYPE_Q8_0; break; + case GGML_TYPE_IQ4_NL: + assert (ne00 % QK4_NL == 0); + MulMat::set_functions(mm); + expected_typeB = GGML_TYPE_Q8_0; + break; default: return false; -- cgit v1.2.3