From a33e6a0d2a66104ea9a906bdbf8a94d050189d91 Mon Sep 17 00:00:00 2001 From: Kawrakow <48489457+ikawrakow@users.noreply.github.com> Date: Mon, 26 Feb 2024 18:28:38 +0200 Subject: Adding IQ2_S and IQ2_M to complete coverage of the 2-3 bit quantization range (#5721) * Adding IQ2_S and IQ2_M as a single cumulative commit * Update examples/quantize/quantize.cpp Co-authored-by: Georgi Gerganov --------- Co-authored-by: Iwan Kawrakow Co-authored-by: Georgi Gerganov --- tests/test-quantize-fns.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tests/test-quantize-fns.cpp') diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp index 04656bb9..f615b612 100644 --- a/tests/test-quantize-fns.cpp +++ b/tests/test-quantize-fns.cpp @@ -150,6 +150,7 @@ int main(int argc, char * argv[]) { const float total_error = total_quantization_error(qfns, test_size, test_data.data()); const float max_quantization_error = type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS : + type == GGML_TYPE_IQ2_S ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS : type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS : type == GGML_TYPE_IQ3_S ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS : type == GGML_TYPE_IQ3_XXS ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS : MAX_QUANTIZATION_TOTAL_ERROR; @@ -168,7 +169,8 @@ int main(int argc, char * argv[]) { const float vec_dot_error = dot_product_error(qfns, test_size, test_data.data(), test_data2.data()); const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS || - type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S ? MAX_DOT_PRODUCT_ERROR_LOWBIT + type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S + ? MAX_DOT_PRODUCT_ERROR_LOWBIT : MAX_DOT_PRODUCT_ERROR; failed = !(vec_dot_error < max_allowed_error); num_failed += failed; -- cgit v1.2.3