diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-17 11:10:56 +0200 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:51 +0300 |
commit | a35330eb5c0bed7f6e46ba973a7d2ec72eba3a62 (patch) | |
tree | acce5ec15d6622ba048c085c5402f20ffb73a233 | |
parent | d9fb92b7104b929f0427323f7964ef7a4da33d2b (diff) |
iq1_bn: very slightly better Metal dot product
-rw-r--r-- | ggml-metal.metal | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/ggml-metal.metal b/ggml-metal.metal index 52a3133d..44055d2d 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -5068,8 +5068,6 @@ void kernel_mul_mv_iq1_bn_f32_impl( for (int row = 0; row < N_DST; row++) { - //uint8_t u = extra[0] & 0xff; - //scale.i = ((((u >> 4) | 0xf0) - 132) << 23) | ((u & 0x0f) << 19); uint8_t signs = extra[0] >> (8 + 4*ib); float4 acc = {0.f}; for (int j = 0; j < 2; ++j) { @@ -5090,10 +5088,12 @@ void kernel_mul_mv_iq1_bn_f32_impl( acc[2*j+1] += yl[16*j +12] * aux8[2] + yl[16*j +15] * aux8[3]; } - float sum = (signs & 1 ? sumy[0] - acc[0] : acc[0] - sumy[0]) - + (signs & 2 ? sumy[1] - acc[1] : acc[1] - sumy[1]) - + (signs & 4 ? sumy[2] - acc[2] : acc[2] - sumy[2]) - + (signs & 8 ? sumy[3] - acc[3] : acc[3] - sumy[3]); + acc -= sumy; + float sum = (signs & 1 ? -acc[0] : acc[0]) + + (signs & 2 ? -acc[1] : acc[1]) + + (signs & 4 ? -acc[2] : acc[2]) + + (signs & 8 ? -acc[3] : acc[3]); + sumf[row] += sum; extra += nb*sizeof(block_iq1_bn)/2; |