From f7c5a94e756e4add4d531d295ae23493d9857508 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Tue, 15 Apr 2025 17:18:50 +0200 Subject: Better gemm/gemv on AVX2 fr q4_0_r8 (#331) Co-authored-by: Iwan Kawrakow --- ggml/src/iqk/iqk_mul_mat.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'ggml/src') diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 9a34270b..78270f5e 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -3620,8 +3620,7 @@ inline __m256i accum_q4_0_quants(const __m256i * v, const int8_t * qs) { _mm256_maddubs_epi16(v[5], _mm256_shuffle_epi32(yh, 0x55))); auto sumi4 = _mm256_add_epi16(_mm256_maddubs_epi16(v[6], _mm256_shuffle_epi32(yh, 0xaa)), _mm256_maddubs_epi16(v[7], _mm256_shuffle_epi32(yh, 0xff))); - auto sumi = _mm256_add_epi32(_mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(sumi1, sumi2)), - _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(sumi3, sumi4))); + auto sumi = _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(_mm256_add_epi16(sumi1, sumi2), _mm256_add_epi16(sumi3, sumi4))); #endif return sumi; } -- cgit v1.2.3