diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-08-05 08:13:53 +0300 |
---|---|---|
committer | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-08-05 07:18:18 +0200 |
commit | 6901b3bf14ee56b04a6fd50313fe775f871b2722 (patch) | |
tree | bb221eff9588d01566a2f75ab5f93e7b67a92f4f /ggml/src/iqk/iqk_quantize.cpp | |
parent | e830f4a5f7c99fb5e391a85b6bdcc825b61d1fdd (diff) |
iq3_k, iq5_k: faster quantization
Just use the same trick as iq4_k
Diffstat (limited to 'ggml/src/iqk/iqk_quantize.cpp')
-rw-r--r-- | ggml/src/iqk/iqk_quantize.cpp | 45 |
1 files changed, 24 insertions, 21 deletions
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp index b6df288a..c840fabf 100644 --- a/ggml/src/iqk/iqk_quantize.cpp +++ b/ggml/src/iqk/iqk_quantize.cpp @@ -632,14 +632,17 @@ void vec_dot_iq2_k_q8_k(int n, float * GGML_RESTRICT s, size_t bs, const void * // ============================================== iq3_k // namespace { -static int8_t iq3nl_index[69] = { - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5 +const int8_t iq3nl_index[111] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, + 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 11, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 14, 14, 7, 7, 7, 7, 7, 7, 7, 7, 7 }; -static inline int best_index_iq3nl(const int8_t * values, float x) { - int index = x < values[1] ? 0 : x >= values[6] ? 6 : iq3nl_index[(int)x - values[1]]; - return x - values[index] < values[index+1] - x ? index : index+1; +inline int best_index_iq3nl(const int8_t * values, float x) { + int ix = (int)x - values[0]; + if (ix < 0 || ix >= 111) return ix < 0 ? 0 : 7; + ix = iq3nl_index[ix]; + return ix < 8 ? ix : x - values[ix-8] < values[ix-7] - x ? ix-8 : ix-7; } static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) { @@ -1290,21 +1293,21 @@ void vec_dot_iq5_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx, } namespace { -static int8_t iq5nl_index[248] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, - 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16, - 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, - 21, 21, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29, - 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30 +const int8_t iq5nl_index[248] = { + 0, 0, 0, 0, 0, 0, 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 33, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34, 34, 3, 3, + 3, 3, 3, 3, 3, 3, 35, 35, 4, 4, 4, 4, 4, 4, 4, 36, 36, 5, 5, 5, 5, 5, 5, 5, 37, 37, 6, 6, 6, 6, 6, 6, + 6, 38, 7, 7, 7, 7, 7, 7, 39, 39, 8, 8, 8, 8, 8, 40, 40, 9, 9, 9, 9, 9, 41, 41, 10, 10, 10, 10, 10, 42, 11, 11, + 11, 11, 11, 43, 12, 12, 12, 12, 12, 44, 13, 13, 13, 13, 13, 45, 14, 14, 14, 14, 14, 46, 15, 15, 15, 15, 47, 47, 16, 16, 16, 16, + 48, 17, 17, 17, 17, 17, 49, 18, 18, 18, 18, 18, 50, 19, 19, 19, 19, 19, 51, 20, 20, 20, 20, 20, 52, 21, 21, 21, 21, 21, 53, 53, + 22, 22, 22, 22, 22, 54, 54, 23, 23, 23, 23, 23, 23, 55, 24, 24, 24, 24, 24, 24, 24, 56, 25, 25, 25, 25, 25, 25, 25, 57, 57, 26, + 26, 26, 26, 26, 26, 26, 58, 58, 27, 27, 27, 27, 27, 27, 27, 27, 59, 28, 28, 28, 28, 28, 28, 28, 28, 28, 60, 29, 29, 29, 29, 29, + 29, 29, 29, 29, 29, 61, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 62, 31, 31, 31, 31, 31, 31 }; -static inline int best_index_iq5nl(const int8_t * values, float x) { - if (x <= values[ 0]) return 0; - if (x >= values[31]) return 31; - int index = iq5nl_index[(int)x - values[0]]; - return x - values[index] < values[index+1] - x ? index : index+1; +inline int best_index_iq5nl(const int8_t * values, float x) { + int ix = (int)x - values[0]; + if (ix < 0 || ix >= 247) return ix < 0 ? 0 : 31; + ix = iq5nl_index[ix]; + return ix < 32 ? ix : x - values[ix-32] < values[ix-31] - x ? ix-32 : ix-31; } void quantize_row_iq5_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) { |