diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2024-10-02 08:17:00 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-02 08:17:00 +0300 |
commit | d6909ed6f00f91f20c9ef628085a1a1a6a55c453 (patch) | |
tree | 4b71cfe72e4662385d1efee7af3a4eb825b35c8b | |
parent | 0999f77e5b1a97164ee0218f5fc118fe1649b0a3 (diff) |
iq4_nl: faster quantization (#76)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r-- | ggml/src/ggml-quants.c | 32 |
1 files changed, 19 insertions, 13 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index d6b1dc0a..bef2f73e 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -14347,15 +14347,21 @@ size_t quantize_iq1_m(const float * restrict src, void * restrict dst, int64_t n // ============================ 4-bit non-linear quants -static inline int best_index_int8(int n, const int8_t * val, float x) { - if (x <= val[0]) return 0; - if (x >= val[n-1]) return n-1; - int ml = 0, mu = n-1; - while (mu-ml > 1) { - int mav = (ml+mu)/2; - if (x < val[mav]) mu = mav; else ml = mav; - } - return x - val[mu-1] < val[mu] - x ? mu-1 : mu; +static const int8_t iq4nl_index[241] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 17, 17, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 18, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 19, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 20, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 21, 21, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 22, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 23, 23, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 24, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 25, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 26, 26, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 27, 27, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 28, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 29, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 30, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15 +}; +static inline int best_index_iq4nl(const int8_t * values, float x) { + int ix = (int)x - values[0]; + if (ix < 0 || ix >= 241) return ix < 0 ? 0 : 15; + ix = iq4nl_index[ix]; + return ix < 16 ? ix : x - values[ix-16] < values[ix-15] - x ? ix-16 : ix-15; } static void quantize_row_iq4_nl_impl(const int super_block_size, const int block_size, const float * restrict x, @@ -14398,7 +14404,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block float sumqx = 0, sumq2 = 0; for (int j = 0; j < block_size; ++j) { float al = id*xb[j]; - int l = best_index_int8(16, values, al); + int l = best_index_iq4nl(values, al); Lb[j] = l; float q = values[l]; float w = weight[j]; @@ -14412,7 +14418,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block sumqx = sumq2 = 0; for (int j = 0; j < block_size; ++j) { float al = id*xb[j]; - int l = best_index_int8(16, values, al); + int l = best_index_iq4nl(values, al); float q = values[l]; float w = weight[j]; sumqx += w*q*xb[j]; @@ -14443,7 +14449,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block uint8_t * Lb = L + ib*block_size; const float * xb = x + ib*block_size; for (int j = 0; j < block_size; ++j) { - Lb[j] = best_index_int8(16, values, idl*xb[j]); + Lb[j] = best_index_iq4nl(values, idl*xb[j]); } l += 32; uint8_t l_l = l & 0xf; @@ -14457,7 +14463,7 @@ static void quantize_row_iq4_nl_impl(const int super_block_size, const int block if (ntry > 0) { float id = scales[0] ? 1/scales[0] : 0; for (int j = 0; j < super_block_size; ++j) { - L[j] = best_index_int8(16, values, id*x[j]); + L[j] = best_index_iq4nl(values, id*x[j]); } } } |