summaryrefslogtreecommitdiff
path: root/iqk_mul_mat.cpp
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-07 15:21:16 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 12:02:50 +0300
commit29164263f48790cb280948e34963a5e5a0e1da6a (patch)
treec112fd8d2259516232ae68dfe433a0745edd06c9 /iqk_mul_mat.cpp
parent36c3f57b0a44fd2b51ede5d765dc870c805b867f (diff)
iqk_mul_mat: fp16 tweaks
Use 4x3 tiling on a real AVX2 CPU (with only 16 vector registers). This works best for the Ryzen-5975WX.
Diffstat (limited to 'iqk_mul_mat.cpp')
-rw-r--r--iqk_mul_mat.cpp2
1 files changed, 2 insertions, 0 deletions
diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp
index 905a7f9b..72147615 100644
--- a/iqk_mul_mat.cpp
+++ b/iqk_mul_mat.cpp
@@ -2434,7 +2434,9 @@ bool MulMat::set_mul_mat(int typeA, int ne00, MulMat& mm, int& row_size_q8, int
mm.funcs[0] = mul_mat_f16_f32_T<1>;
mm.funcs[1] = mul_mat_f16_f32_T<2>;
mm.funcs[2] = mul_mat_f16_f32_T<3>;
+#ifdef __AVX512F__
mm.funcs[3] = mul_mat_f16_f32_T<4>;
+#endif
row_size_q8 = ggml_row_size(GGML_TYPE_F32, ne00);
return true;
}