summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-02-20 13:55:13 +0200
committerGitHub <noreply@github.com>2025-02-20 13:55:13 +0200
commita45da7bfbf75503fe9e5a2f675db7825afdc6310 (patch)
tree9741966e94c7cee1c346a9ee7a72f0c5b88a166e
parent498a582919f3955fee9ba4239d5f7a298a42425d (diff)
Fix NEON gemm/gemv for legacy quants when row size is not divisible by 128 (#213)
* Fix gemm/gemv for legacy quants when row size is not divisible by 128 * Fix typo --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp12
1 files changed, 5 insertions, 7 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 33e0a4a7..e8150ec5 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -11310,9 +11310,9 @@ inline void mul_mat_qX_Y_q8_Y(int n, Dequantizer& deq, Q8& q8, const DataInfo& i
q8.process_scales(i, deq, sc16, acc);
sum_4(i, deq, q8, sc16, acc);
}
- //for (int i = 4*(nb/4); i < nb; ++i) {
- // q8.process_1_block(i, deq, acc);
- //}
+ for (int i = 4*(nb/4); i < nb; ++i) {
+ q8.process_1_block(i, deq, acc);
+ }
for (int iy = 0; iy < Q8::nrc_y; ++iy) {
info.store(ix, iy, vaddvq_f32(acc[iy]));
@@ -11387,15 +11387,13 @@ static void mul_mat_qX_1_q8_1(int n, const void * vx, size_t bx, const DataInfo&
Dequantizer deq1(vx, bx), deq2(vx, bx);
mul_mat_qX_Y_q8_Y_1(n, deq1, deq2, q8, info, nrc_x);
} else {
- if (nrc_x%2 == 0) {
+ if (nrc_x%2 == 0 && n%128 == 0) {
Dequantizer deq1(vx, bx), deq2(vx, bx);
mul_mat_qX_Y_q8_Y_IK(n, deq1, deq2, q8, info, nrc_x);
} else {
Dequantizer deq(vx, bx);
mul_mat_qX_Y_q8_Y(n, deq, q8, info, nrc_x);
}
- //Dequantizer deq(vx, bx);
- //mul_mat_qX_Y_q8_Y(n, deq, q8, info, nrc_x);
}
}
@@ -11406,7 +11404,7 @@ static void mul_mat_qX_0_q8_0(int n, const void * vx, size_t bx, const DataInfo&
Dequantizer deq1(vx, bx), deq2(vx, bx);
mul_mat_qX_Y_q8_Y_1(n, deq1, deq2, q8, info, nrc_x);
} else {
- if (nrc_x%2 == 0) {
+ if (nrc_x%2 == 0 && n%128 == 0) {
Dequantizer deq1(vx, bx), deq2(vx, bx);
mul_mat_qX_Y_q8_Y_IK(n, deq1, deq2, q8, info, nrc_x);
} else {