summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp8
1 files changed, 2 insertions, 6 deletions
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index d1af9fe8..b1491d7f 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -9353,12 +9353,8 @@ void mul_mat_q8_k_r8_q8_k(int n, const void * vx, size_t bx, const DataInfo& inf
auto d4h = vcvt_f32_f16(vld1_f16((const float16_t *)iq8[ibl].d+4));
int32x4_t isum[2*nrc_y] = {};
for (int ib = 0; ib < QK_K/16; ++ib) {
- auto q1 = vld1q_u8_x4(iq8[ibl].qs + 128*ib + 0);
- auto q2 = vld1q_u8_x4(iq8[ibl].qs + 128*ib + 64);
- for (int k = 0; k < 4; ++k) {
- q1.val[k] = veorq_u8(q1.val[k], vdupq_n_u8(0x80));
- q2.val[k] = veorq_u8(q2.val[k], vdupq_n_u8(0x80));
- }
+ auto q1 = vld1q_s8_x4(iq8[ibl].qs + 128*ib + 0);
+ auto q2 = vld1q_s8_x4(iq8[ibl].qs + 128*ib + 64);
for (int iy = 0; iy < nrc_y; ++iy) {
auto y = vld1q_s8(q8.y[iy][ibl].qs+16*ib);
isum[2*iy+0] = vdotq_laneq_s32(isum[2*iy+0], q1.val[0], y, 0);