diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-07 14:23:32 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:50 +0300 |
commit | bc659e7de1054bc64a31da73feadb4865c887107 (patch) | |
tree | f7d1eec37803ff4dd1e93ee8ea366615032a1c77 /sgemm.cpp | |
parent | 8e072bbba3ffedd0940d97326b5a481b277c2f9e (diff) |
iqk_mul_mat: fp16 implementation for AVX2
This simple implementation beats jart's tiniBLAS by a
small margin (143 t/s vs 137 t/s for PP-512, TG is
4.75 t/s, so exactly the same as ggml).
Diffstat (limited to 'sgemm.cpp')
-rw-r--r-- | sgemm.cpp | 7 |
1 files changed, 7 insertions, 0 deletions
@@ -51,6 +51,7 @@ #include "sgemm.h" #include "ggml-impl.h" #include "ggml-quants.h" +#include "iqk_mul_mat.h" #ifdef _MSC_VER #define NOINLINE __declspec(noinline) @@ -865,6 +866,12 @@ bool llamafile_sgemm(int64_t m, int64_t n, int64_t k, const void *A, int64_t lda if (Ctype != GGML_TYPE_F32) return false; + if (task == GGML_TASK_TYPE_COMPUTE && k >= 256 && Atype == GGML_TYPE_F16 && Btype == GGML_TYPE_F32) { + if (iqk_mul_mat(m, n, k, Atype, A, B, (float *)C, ldc, ith, nth)) { + return true; + } + } + switch (Atype) { case GGML_TYPE_F32: { |