diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-07 15:21:16 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:50 +0300 |
commit | 29164263f48790cb280948e34963a5e5a0e1da6a (patch) | |
tree | c112fd8d2259516232ae68dfe433a0745edd06c9 /iqk_mul_mat.cpp | |
parent | 36c3f57b0a44fd2b51ede5d765dc870c805b867f (diff) |
iqk_mul_mat: fp16 tweaks
Use 4x3 tiling on a real AVX2 CPU (with only 16 vector registers).
This works best for the Ryzen-5975WX.
Diffstat (limited to 'iqk_mul_mat.cpp')
-rw-r--r-- | iqk_mul_mat.cpp | 2 |
1 files changed, 2 insertions, 0 deletions
diff --git a/iqk_mul_mat.cpp b/iqk_mul_mat.cpp index 905a7f9b..72147615 100644 --- a/iqk_mul_mat.cpp +++ b/iqk_mul_mat.cpp @@ -2434,7 +2434,9 @@ bool MulMat::set_mul_mat(int typeA, int ne00, MulMat& mm, int& row_size_q8, int mm.funcs[0] = mul_mat_f16_f32_T<1>; mm.funcs[1] = mul_mat_f16_f32_T<2>; mm.funcs[2] = mul_mat_f16_f32_T<3>; +#ifdef __AVX512F__ mm.funcs[3] = mul_mat_f16_f32_T<4>; +#endif row_size_q8 = ggml_row_size(GGML_TYPE_F32, ne00); return true; } |