summaryrefslogtreecommitdiff
path: root/ggml/src
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src')
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp5
1 files changed, 4 insertions, 1 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 3b58495e..e39c27d9 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -8082,6 +8082,9 @@ struct QFBase {
using Acc = __m512;
static inline Data load(const ggml_half * x) { return _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)x)); }
static inline Data load(const float * x) { return _mm512_loadu_ps(x); }
+ static inline Data load(const ggml_bf16_t * x) {
+ return _mm512_castsi512_ps(_mm512_slli_epi32(_mm512_cvtepu16_epi32(_mm256_loadu_si256((const __m256i*)x)), 16));
+ }
static inline Acc acc(Acc prev, const Data& y, const Data& x) {
return _mm512_fmadd_ps(y, x, prev);
}
@@ -16079,7 +16082,7 @@ inline void iqk_flash_helper_T(KHelper& kh, ggml_type type_v,
HelperF16<Dv, k_step> vh(v, stride_v);
iqk_flash_helper<Dk, Dv, k_step>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv);
} break;
-#ifdef HAVE_FANCY_SIMD
+#ifdef __AVX512BF16__
case GGML_TYPE_BF16: {
HelperBF16<Dv, k_step> vh(v, stride_v);
iqk_flash_helper<Dk, Dv, k_step>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv);