summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-04-15 17:18:50 +0200
committerGitHub <noreply@github.com>2025-04-15 17:18:50 +0200
commitf7c5a94e756e4add4d531d295ae23493d9857508 (patch)
tree1499eca65a7861fde231d0be3fe1c3f54da15639
parent1bbb143eb36ce3f6b771fd4c0deb02826c0a7800 (diff)
Better gemm/gemv on AVX2 fr q4_0_r8 (#331)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp3
1 files changed, 1 insertions, 2 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 9a34270b..78270f5e 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -3620,8 +3620,7 @@ inline __m256i accum_q4_0_quants(const __m256i * v, const int8_t * qs) {
_mm256_maddubs_epi16(v[5], _mm256_shuffle_epi32(yh, 0x55)));
auto sumi4 = _mm256_add_epi16(_mm256_maddubs_epi16(v[6], _mm256_shuffle_epi32(yh, 0xaa)),
_mm256_maddubs_epi16(v[7], _mm256_shuffle_epi32(yh, 0xff)));
- auto sumi = _mm256_add_epi32(_mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(sumi1, sumi2)),
- _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(sumi3, sumi4)));
+ auto sumi = _mm256_madd_epi16(_mm256_set1_epi16(1), _mm256_add_epi16(_mm256_add_epi16(sumi1, sumi2), _mm256_add_epi16(sumi3, sumi4)));
#endif
return sumi;
}