From d82e5db6e5e4985bc21506094a42493fde34f582 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 17 Jun 2024 16:50:11 +0300 Subject: iqk_mul_mat(bitnet): fix typo With the last change (which added the typo), I'm now getting PP-512 = 300 t/s on the Ryzen-5975WX. --- iqk_mul_mat.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'iqk_mul_mat.cpp') diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 923829ab..41c920de 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -1381,7 +1381,7 @@ IQK_NOINLINE void mul_mat_iq1bn_q8_K64(int n, const void * vx, size_t bx, const #if defined __AVX512VNNI__ && defined __AVX512VL__ auto dot = _mm256_dpbusd_epi32(_mm256_dpbusd_epi32(_mm256_setzero_si256(), m1_8, dot1), m1_8, dot2); #else - auto dot = _mm256_madd_epi16(m1_16, _mm256_add_api16(_mm256_maddubs_epi16(m1_8, dot1), _mm256_maddubs_epi16(m1_8, dot2))); + auto dot = _mm256_madd_epi16(m1_16, _mm256_add_epi16(_mm256_maddubs_epi16(m1_8, dot1), _mm256_maddubs_epi16(m1_8, dot2))); #endif accd[0] = _mm256_fmadd_ps(_mm256_set1_ps(q8.scale(0, i)), _mm256_cvtepi32_ps(dot), accd[0]); } else { -- cgit v1.2.3