summaryrefslogtreecommitdiff
path: root/sgemm.cpp
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-10 08:16:52 +0200
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 12:02:50 +0300
commit9386b499181a1d89c39e3a8114ef3255e9d52e63 (patch)
tree029577b0d13db63eb6a1b619d8298f7682cbd0db /sgemm.cpp
parent09d86e58768c14f1efe263433556944c2f39eac2 (diff)
iqk_mul_mat: fp16 for Arm
~2% slower than tinyBLAS - not sure why.
Diffstat (limited to 'sgemm.cpp')
-rw-r--r--sgemm.cpp16
1 files changed, 13 insertions, 3 deletions
diff --git a/sgemm.cpp b/sgemm.cpp
index 93a25521..a16752f0 100644
--- a/sgemm.cpp
+++ b/sgemm.cpp
@@ -866,10 +866,20 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda
if (Ctype != GGML_TYPE_F32)
return false;
- if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F32) {
- if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
- return true;
+ if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16) {
+#if defined __AVX2__ && defined __FMA__
+ if (Btype == GGML_TYPE_F32) {
+ if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
+ return true;
+ }
}
+#elif defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC && defined __ARM_FEATURE_FMA
+ if (Btype == GGML_TYPE_F16) {
+ if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) {
+ return true;
+ }
+ }
+#endif
}
switch (Atype) {