diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-04-01 08:29:25 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-04-01 08:29:25 +0200 |
commit | 6e5156cab5c6d2858e1ecd5bc4dc5db81c71de39 (patch) | |
tree | 5e1e9df3afffd5dd34ac0371b4d43e802126bd36 /ggml/src | |
parent | 4819257ce66a680608cf9c7871156041d00eb7da (diff) |
Fix #300 (#301)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src')
-rw-r--r-- | ggml/src/iqk/iqk_mul_mat.cpp | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index cf512ba5..1c8a991d 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -15514,8 +15514,13 @@ struct HelperQ8KVR8 : public BaseHelper<step> { template <int D, int step> struct HelperQ40 final : public BaseHelper<step> { using Base = BaseHelper<step>; +#if defined __AVX2__ using block_q8 = block_q8_2; constexpr static int block_size_q = QK8_2; +#else + using block_q8 = block_q8_0; + constexpr static int block_size_q = QK8_0; +#endif HelperQ40(const char * data, int stride) : Base(data, stride) {} // Needed for v * softmax(k * q) @@ -15558,8 +15563,8 @@ struct HelperQ40 final : public BaseHelper<step> { template <int D, int step> struct HelperQ41 final : public BaseHelper<step> { using Base = BaseHelper<step>; - using block_q8 = block_q8_1; - constexpr static int block_size_q = QK8_1; + using block_q8 = block_q8_2; + constexpr static int block_size_q = QK8_2; HelperQ41(const char * data, int stride) : Base(data, stride) {} // Needed for v * softmax(k * q) @@ -16414,7 +16419,7 @@ struct FlashQKfp32 { #ifdef __aarch64__ MAKE_FUNCS(mul_mat_qX_0_q8_0<DequantizerQ40, nq); #else - MAKE_FUNCS(mul_mat_qX_1_q8_2_T<Q4_0_Unpacker, nq); + MAKE_FUNCS(mul_mat_qX_1_q8_2_T<Q4_0_1_Unpacker, nq); #endif } else if constexpr (std::is_same_v<KHelper, HelperQ41<D, k_step>>) { |