summaryrefslogtreecommitdiff
path: root/ggml/src/ggml-quants.c
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-08-19 15:33:27 +0300
committerGitHub <noreply@github.com>2024-08-19 15:33:27 +0300
commita73702d93b1007b2f528432c3db20c7aa5206352 (patch)
tree9837a54a824e497a0bddefbb739a54a79df7441d /ggml/src/ggml-quants.c
parent5652100afcc423cf6342778cde372ca6aa54a79b (diff)
AVX2 quantization for Q8_K (#22)
It has been there for a while, but forgot to add here. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/ggml-quants.c')
-rw-r--r--ggml/src/ggml-quants.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 41362dee..981fb54b 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -12,6 +12,7 @@
#include "ggml-impl.h"
#if GGML_USE_IQK_MULMAT
#include "iqk/iqk_mul_mat.h"
+#include "iqk/iqk_quantize.h"
#endif
@@ -3770,7 +3771,11 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int6
}
void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
+#ifdef GGML_USE_IQK_MULMAT
+ iqk_quantize_row_q8_K(x, y, k);
+#else
quantize_row_q8_K_ref(x, y, k);
+#endif
}
//===================================== Dot ptoducts =================================