From 8222c9f3d1e91096ab554f62ffbc384535b1963e Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 17 Jun 2024 13:04:24 +0200 Subject: iq1_bn(NEON): works now, but very slow Basically 2X slower tan q4_0. --- iqk_mul_mat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 9f4224cc..09189fa7 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -4088,7 +4088,7 @@ static void mul_mat_iq1bn_q8_K64(int n, const void * vx, size_t bx, const DataIn int32x4_t sumi = vdupq_n_s32(0); for (int j = 0; j < 4; ++j) { auto tmp = vmulq_s8(q.val[j], vreinterpretq_s8_u8(signs.val[j])); - tmp = vmulq_s8(q.val[j], v.val[j]); + tmp = vmulq_s8(tmp, v.val[j]); sumi = ggml_vdotq_s32(sumi, m1, tmp); } accd[iy] = vfmaq_f32(accd[iy], vdupq_n_f32(q8.scale(iy, i)), vcvtq_f32_s32(sumi)); -- cgit v1.2.3