summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-08-05 08:13:53 +0300
committerKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-08-05 07:18:18 +0200
commit6901b3bf14ee56b04a6fd50313fe775f871b2722 (patch)
treebb221eff9588d01566a2f75ab5f93e7b67a92f4f
parente830f4a5f7c99fb5e391a85b6bdcc825b61d1fdd (diff)
iq3_k, iq5_k: faster quantization
Just use the same trick as iq4_k
-rw-r--r--ggml/src/iqk/iqk_quantize.cpp45
1 files changed, 24 insertions, 21 deletions
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index b6df288a..c840fabf 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -632,14 +632,17 @@ void vec_dot_iq2_k_q8_k(int n, float * GGML_RESTRICT s, size_t bs, const void *
// ============================================== iq3_k
//
namespace {
-static int8_t iq3nl_index[69] = {
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
- 5, 5, 5, 5, 5
+const int8_t iq3nl_index[111] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9,
+ 9, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 10, 10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 11, 11, 4, 4, 4, 4,
+ 4, 4, 4, 4, 4, 4, 12, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 13, 13, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+ 6, 6, 6, 6, 14, 14, 7, 7, 7, 7, 7, 7, 7, 7, 7
};
-static inline int best_index_iq3nl(const int8_t * values, float x) {
- int index = x < values[1] ? 0 : x >= values[6] ? 6 : iq3nl_index[(int)x - values[1]];
- return x - values[index] < values[index+1] - x ? index : index+1;
+inline int best_index_iq3nl(const int8_t * values, float x) {
+ int ix = (int)x - values[0];
+ if (ix < 0 || ix >= 111) return ix < 0 ? 0 : 7;
+ ix = iq3nl_index[ix];
+ return ix < 8 ? ix : x - values[ix-8] < values[ix-7] - x ? ix-8 : ix-7;
}
static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {
@@ -1290,21 +1293,21 @@ void vec_dot_iq5_k_q8_k(int n, float * s, size_t bs, const void * vx, size_t bx,
}
namespace {
-static int8_t iq5nl_index[248] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
- 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10,
- 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 14, 15, 15, 15, 15, 15, 16, 16, 16,
- 16, 16, 16, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21,
- 21, 21, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25,
- 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 29,
- 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30
+const int8_t iq5nl_index[248] = {
+ 0, 0, 0, 0, 0, 0, 32, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 33, 33, 2, 2, 2, 2, 2, 2, 2, 2, 2, 34, 34, 3, 3,
+ 3, 3, 3, 3, 3, 3, 35, 35, 4, 4, 4, 4, 4, 4, 4, 36, 36, 5, 5, 5, 5, 5, 5, 5, 37, 37, 6, 6, 6, 6, 6, 6,
+ 6, 38, 7, 7, 7, 7, 7, 7, 39, 39, 8, 8, 8, 8, 8, 40, 40, 9, 9, 9, 9, 9, 41, 41, 10, 10, 10, 10, 10, 42, 11, 11,
+ 11, 11, 11, 43, 12, 12, 12, 12, 12, 44, 13, 13, 13, 13, 13, 45, 14, 14, 14, 14, 14, 46, 15, 15, 15, 15, 47, 47, 16, 16, 16, 16,
+ 48, 17, 17, 17, 17, 17, 49, 18, 18, 18, 18, 18, 50, 19, 19, 19, 19, 19, 51, 20, 20, 20, 20, 20, 52, 21, 21, 21, 21, 21, 53, 53,
+ 22, 22, 22, 22, 22, 54, 54, 23, 23, 23, 23, 23, 23, 55, 24, 24, 24, 24, 24, 24, 24, 56, 25, 25, 25, 25, 25, 25, 25, 57, 57, 26,
+ 26, 26, 26, 26, 26, 26, 58, 58, 27, 27, 27, 27, 27, 27, 27, 27, 59, 28, 28, 28, 28, 28, 28, 28, 28, 28, 60, 29, 29, 29, 29, 29,
+ 29, 29, 29, 29, 29, 61, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 30, 62, 31, 31, 31, 31, 31, 31
};
-static inline int best_index_iq5nl(const int8_t * values, float x) {
- if (x <= values[ 0]) return 0;
- if (x >= values[31]) return 31;
- int index = iq5nl_index[(int)x - values[0]];
- return x - values[index] < values[index+1] - x ? index : index+1;
+inline int best_index_iq5nl(const int8_t * values, float x) {
+ int ix = (int)x - values[0];
+ if (ix < 0 || ix >= 247) return ix < 0 ? 0 : 31;
+ ix = iq5nl_index[ix];
+ return ix < 32 ? ix : x - values[ix-32] < values[ix-31] - x ? ix-32 : ix-31;
}
void quantize_row_iq5_k_impl(const float * x, void * vy, int n_per_row, const float * quant_weights) {