diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-02-12 14:22:26 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-12 14:22:26 +0200 |
commit | 1bbb543478bbc0997c3f86581c4f95338a5eb5c3 (patch) | |
tree | 50216f1f623124d61a72bd8a8f09121964a11bf2 | |
parent | e974fc9e6691825611c781525ae16d77eed3cbc0 (diff) |
Fix iqk_mul_mat on AVX512 systems that are missing BF16 support (#204)
* Fix iqk_mul_mat on AVX512 systems that are missing BF16 support
* One more
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r-- | ggml/src/iqk/iqk_mul_mat.cpp | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 3b58495e..e39c27d9 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -8082,6 +8082,9 @@ struct QFBase { using Acc = __m512; static inline Data load(const ggml_half * x) { return _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)x)); } static inline Data load(const float * x) { return _mm512_loadu_ps(x); } + static inline Data load(const ggml_bf16_t * x) { + return _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(_mm256_loadu_si256((const __m256i*)x)), 16)); + } static inline Acc acc(Acc prev, const Data& y, const Data& x) { return _mm512_fmadd_ps(y, x, prev); } @@ -16079,7 +16082,7 @@ inline void iqk_flash_helper_T(KHelper& kh, ggml_type type_v, HelperF16<Dv, k_step> vh(v, stride_v); iqk_flash_helper<Dk, Dv, k_step>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv); } break; -#ifdef HAVE_FANCY_SIMD +#ifdef __AVX512BF16__ case GGML_TYPE_BF16: { HelperBF16<Dv, k_step> vh(v, stride_v); iqk_flash_helper<Dk, Dv, k_step>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv); |