summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-17 16:50:11 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 12:02:51 +0300
commitd82e5db6e5e4985bc21506094a42493fde34f582 (patch)
treee77f4a192f9b40858275923235e76dfcd977ae1b
parentddea72453bf0e83e739865c3eb109c85831d3572 (diff)
iqk_mul_mat(bitnet): fix typo
With the last change (which added the typo), I'm now getting PP-512 = 300 t/s on the Ryzen-5975WX.
-rw-r--r--iqk_mul_mat.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp
index 923829ab..41c920de 100644
--- a/iqk_mul_mat.cpp
+++ b/iqk_mul_mat.cpp
@@ -1381,7 +1381,7 @@ IQK_NOINLINE void mul_mat_iq1bn_q8_K64(int n, const void * vx, size_t bx, const
#if defined __AVX512VNNI__ && defined __AVX512VL__
auto dot = _mm256_dpbusd_epi32(_mm256_dpbusd_epi32(_mm256_setzero_si256(), m1_8, dot1), m1_8, dot2);
#else
- auto dot = _mm256_madd_epi16(m1_16, _mm256_add_api16(_mm256_maddubs_epi16(m1_8, dot1), _mm256_maddubs_epi16(m1_8, dot2)));
+ auto dot = _mm256_madd_epi16(m1_16, _mm256_add_epi16(_mm256_maddubs_epi16(m1_8, dot1), _mm256_maddubs_epi16(m1_8, dot2)));
#endif
accd[0] = _mm256_fmadd_ps(_mm256_set1_ps(q8.scale(0, i)), _mm256_cvtepi32_ps(dot), accd[0]);
} else {