diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-25 17:20:34 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-25 17:20:34 +0300 |
commit | 8b436a84c53de4c5a8eaf9be72cdd82324da2eeb (patch) | |
tree | d3126b6fe0f108403fa2445da9827ab3536469e3 | |
parent | c906c4c4fe29f310b8a42e56aae4a5377f6617f4 (diff) |
bitnet: simdify q8_K64 quantization on AVX
Doesn't make a real difference in performance.
-rw-r--r-- | iqk-quantize.cpp | 32 | ||||
-rw-r--r-- | iqk_mul_mat.cpp | 1 |
2 files changed, 32 insertions, 1 deletions
diff --git a/iqk-quantize.cpp b/iqk-quantize.cpp index 0248c563..f5840778 100644 --- a/iqk-quantize.cpp +++ b/iqk-quantize.cpp @@ -385,6 +385,38 @@ void quantize_row_q8_K64_reference(const float * x, block_q8_K64 * y, int64_t k) vst1q_s8(qs, qi); qs += 16; } +#elif defined __AVX__ + __m128 max[4] = {}; + __m128 sign_bit = _mm_set1_ps(-0.f); + for (int j = 0; j < k; j += 16) { + for (int i = 0; i < 4; ++i) { + auto val = _mm_loadu_ps(x + j + 4*i); + val = _mm_andnot_ps(sign_bit, val); + max[i] = _mm_max_ps(max[i], val); + } + } + __m128 vid[4]; + for (int i = 0; i < 4; ++i) { + max[i] = _mm_max_ps(max[i], _mm_movehl_ps(max[i], max[i])); + max[i] = _mm_max_ss(max[i], _mm_movehdup_ps(max[i])); + float maxi = _mm_cvtss_f32(max[i]); + dptr[i] = maxi/127; + float id = dptr[i] > 0 ? 1/dptr[i] : 0.f; + vid[i] = _mm_set1_ps(id); + } + __m128i q[4]; + for (int j = 0; j < k; j += 16) { + for (int i = 0; i < 4; ++i) { + auto val = _mm_loadu_ps(x + j + 4*i); + val = _mm_round_ps(_mm_mul_ps(vid[i], val), _MM_ROUND_NEAREST); + q[i] = _mm_cvtps_epi32(val); + } + auto q1 = _mm_packs_epi32(q[0], q[1]); + auto q2 = _mm_packs_epi32(q[2], q[3]); + auto qi = _mm_packs_epi16(q1, q2); + _mm_storeu_si128((__m128i *)qs, qi); + qs += 16; + } #else float aux[4] = {0.f, 0.f, 0.f, 0.f}; for (int j = 0; j < k; j += 16) { diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 666c2caf..d4354343 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -2788,7 +2788,6 @@ bool MulMat::prepare(int typeA, int typeB, int ne00, MulMat& mm, int Ny) { MulMat::set_functions<DequantizerIQ2XXS>(mm); break; case GGML_TYPE_IQ1_BN: - return false; assert (ne00 % QK_IQ1BN == 0); mm.funcs[0] = mul_mat_iq1bn_q8_K64<1>; mm.funcs[1] = mul_mat_iq1bn_q8_K64<2>; |