From 9386b499181a1d89c39e3a8114ef3255e9d52e63 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 10 Jun 2024 08:16:52 +0200 Subject: iqk_mul_mat: fp16 for Arm ~2% slower than tinyBLAS - not sure why. --- sgemm.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'sgemm.cpp') diff --git a/sgemm.cpp b/sgemm.cpp index 93a25521..a16752f0 100644 --- a/sgemm.cpp +++ b/sgemm.cpp @@ -866,10 +866,20 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda if (Ctype != GGML_TYPE_F32) return false; - if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F32) { - if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) { - return true; + if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16) { +#if defined __AVX2__ && defined __FMA__ + if (Btype == GGML_TYPE_F32) { + if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) { + return true; + } } +#elif defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && defined __ARM_FEATURE_FMA + if (Btype == GGML_TYPE_F16) { + if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) { + return true; + } + } +#endif } switch (Atype) { -- cgit v1.2.3