ggml : support AVX512VNNI (#6280)

This change causes some quants (e.g. Q4_0, Q8_0) to go faster on some architectures (e.g. AMD Zen 4).
author: Justine Tunney <jtunney@gmail.com> 2024-03-25 01:39:56 -0400
committer: GitHub <noreply@github.com> 2024-03-25 07:39:56 +0200
commit: 7733f0c76081b2a69b5f8b192db2db7c43629d58 (patch)
tree: 2a78e3e47fbd4d768d61f46d06c5c2815640595e
parent: a32b77c4b2c1808654d0b952f26c37d73d2e746b (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-quants.c b/ggml-quants.c
index 2eaca059..f26798ac 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -132,7 +132,7 @@ static inline __m256 sum_i16_pairs_float(const __m256i x) {
 }
 
 static inline __m256 mul_sum_us8_pairs_float(const __m256i ax, const __m256i sy) {
-#if __AVXVNNI__
+#if defined(__AVXVNNI__) || defined(__AVX512VNNI__)
     const __m256i zero = _mm256_setzero_si256();
     const __m256i summed_pairs = _mm256_dpbusd_epi32(zero, ax, sy);
     return _mm256_cvtepi32_ps(summed_pairs);
author	Justine Tunney <jtunney@gmail.com>	2024-03-25 01:39:56 -0400
committer	GitHub <noreply@github.com>	2024-03-25 07:39:56 +0200
commit	7733f0c76081b2a69b5f8b192db2db7c43629d58 (patch)
tree	2a78e3e47fbd4d768d61f46d06c5c2815640595e
parent	a32b77c4b2c1808654d0b952f26c37d73d2e746b (diff)