From fc04994ebf8bfcb988a913cdd331bb120389bc44 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Fri, 21 Jun 2024 18:30:01 +0200 Subject: iqk_mul_mat: add IQ4_NL also on NEON PPL seems somewhat higher? For llama-v2-7B iwe are still ~0.04 higher compared to hat we expect after ~30 batches. --- iqk_mul_mat.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 2552444d..676b35f0 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -3894,6 +3894,35 @@ struct DequantizerQ40 final : public BaseLegacyDequantizer { //ggml_half aux[4]; }; +struct DequantizerIQ4NL final : public BaseLegacyDequantizer { + + DequantizerIQ4NL(const void * vx, size_t bx) : BaseLegacyDequantizer(vx, bx) {} + + inline void prepare1(int i, int8x16_t * q) const { + bits.prepare1(x[i].qs, q); + q[0] = vqtbl1q_s8(values, q[0]); + q[1] = vqtbl1q_s8(values, q[1]); + } + inline void prepare1(int i) { + prepare1(i, bits.b); + } + + inline float16x4_t new_block(int i) { + ggml_half aux[4]; + for (int k = 0; k < 4; ++k) { + aux[k] = x[4*i+k].d; + prepare1(4*i+k, bits.b + 2*k); + } + return vld1_f16((const float16_t *)aux); + } + static int8x16_t load_values() { + static const int8_t iq4nl_values[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113}; + return vld1q_s8(iq4nl_values); + } + + const int8x16_t values = load_values(); +}; + struct DequantizerQ41 : public BaseLegacyDequantizer { DequantizerQ41(const void * vx, size_t bx) : BaseLegacyDequantizer(vx, bx) {} @@ -4434,7 +4463,7 @@ static void mul_mat_iq2bn_q8_K64(int n, const void * vx, size_t bx, const DataIn template void MulMat::set_functions(MulMat& m) { if constexpr (std::is_same_v || std::is_same_v || - std::is_same_v) { + std::is_same_v || std::is_same_v) { m.funcs[0] = mul_mat_qX_0_q8_0; m.funcs[1] = mul_mat_qX_0_q8_0; m.funcs[2] = mul_mat_qX_0_q8_0; @@ -4557,6 +4586,10 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& m, int /*Ny*/) { MulMat::set_functions(m); expected_Btype = GGML_TYPE_Q8_0; break; + case GGML_TYPE_IQ4_NL: + MulMat::set_functions(m); + expected_Btype = GGML_TYPE_Q8_0; + break; default: return false; } -- cgit v1.2.3