From a73702d93b1007b2f528432c3db20c7aa5206352 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 19 Aug 2024 15:33:27 +0300 Subject: AVX2 quantization for Q8_K (#22) It has been there for a while, but forgot to add here. Co-authored-by: Iwan Kawrakow --- ggml/src/ggml-quants.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'ggml/src/ggml-quants.c') diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 41362dee..981fb54b 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -12,6 +12,7 @@ #include "ggml-impl.h" #if GGML_USE_IQK_MULMAT #include "iqk/iqk_mul_mat.h" +#include "iqk/iqk_quantize.h" #endif @@ -3770,7 +3771,11 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int6 } void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) { +#ifdef GGML_USE_IQK_MULMAT + iqk_quantize_row_q8_K(x, y, k); +#else quantize_row_q8_K_ref(x, y, k); +#endif } //===================================== Dot ptoducts ================================= -- cgit v1.2.3