summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRonny Brendel <ronnybrendel@gmail.com>2023-08-28 14:51:08 +0200
committerGitHub <noreply@github.com>2023-08-28 15:51:08 +0300
commit3af6b86301ddfb11bb68e91dfc030b611b0d8426 (patch)
tree9905b2efce267b21ea5194024fedc12adf3ef417
parent35feac6560387cf0484371af3d9b12bff678e0b9 (diff)
ggml : tiny ggml_vec_dot_q4_K_q8_K AVX2 improvement (#2819)
-rw-r--r--k_quants.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/k_quants.c b/k_quants.c
index 82bf8169..3a9b1daf 100644
--- a/k_quants.c
+++ b/k_quants.c
@@ -2694,13 +2694,13 @@ void ggml_vec_dot_q4_K_q8_K(const int n, float * restrict s, const void * restri
const __m256i q8l = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
__m256i p16l = _mm256_maddubs_epi16(q4l, q8l);
p16l = _mm256_madd_epi16(scale_l, p16l);
- sumi = _mm256_add_epi32(sumi, p16l);
const __m256i q8h = _mm256_loadu_si256((const __m256i*)q8); q8 += 32;
__m256i p16h = _mm256_maddubs_epi16(q4h, q8h);
p16h = _mm256_madd_epi16(scale_h, p16h);
- sumi = _mm256_add_epi32(sumi, p16h);
+ const __m256i sumj = _mm256_add_epi32(p16l, p16h);
+ sumi = _mm256_add_epi32(sumi, sumj);
}
__m256 vd = _mm256_set1_ps(d);