diff options
author | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-08-19 15:33:27 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-08-19 15:33:27 +0300 |
commit | a73702d93b1007b2f528432c3db20c7aa5206352 (patch) | |
tree | 9837a54a824e497a0bddefbb739a54a79df7441d /ggml/src/ggml-quants.c | |
parent | 5652100afcc423cf6342778cde372ca6aa54a79b (diff) |
AVX2 quantization for Q8_K (#22)
It has been there for a while, but forgot to add here.
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/ggml-quants.c')
-rw-r--r-- | ggml/src/ggml-quants.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index 41362dee..981fb54b 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -12,6 +12,7 @@ #include "ggml-impl.h" #if GGML_USE_IQK_MULMAT #include "iqk/iqk_mul_mat.h" +#include "iqk/iqk_quantize.h" #endif @@ -3770,7 +3771,11 @@ void dequantize_row_q8_K(const block_q8_K * restrict x, float * restrict y, int6 } void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) { +#ifdef GGML_USE_IQK_MULMAT + iqk_quantize_row_q8_K(x, y, k); +#else quantize_row_q8_K_ref(x, y, k); +#endif } //===================================== Dot ptoducts ================================= |