summaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-02-26 18:28:38 +0200
committerGitHub <noreply@github.com>2024-02-26 18:28:38 +0200
commita33e6a0d2a66104ea9a906bdbf8a94d050189d91 (patch)
tree30478b4a0b1792d1af66c5d64e2c3c4fa1af74ab /tests
parent47bb7b48c7cec9d8f57d56812ce811ec130b89a3 (diff)
Adding IQ2_S and IQ2_M to complete coverage of the 2-3 bit quantization range (#5721)
* Adding IQ2_S and IQ2_M as a single cumulative commit * Update examples/quantize/quantize.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'tests')
-rw-r--r--tests/test-backend-ops.cpp2
-rw-r--r--tests/test-quantize-fns.cpp4
2 files changed, 4 insertions, 2 deletions
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 24d12ef1..60a85277 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -1916,7 +1916,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
GGML_TYPE_Q2_K, GGML_TYPE_Q3_K,
GGML_TYPE_Q4_K, GGML_TYPE_Q5_K,
GGML_TYPE_Q6_K,
- GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS,
+ GGML_TYPE_IQ2_XXS, GGML_TYPE_IQ2_XS, GGML_TYPE_IQ2_S,
GGML_TYPE_IQ3_XXS, GGML_TYPE_IQ1_S,
GGML_TYPE_IQ4_NL, GGML_TYPE_IQ3_S,
};
diff --git a/tests/test-quantize-fns.cpp b/tests/test-quantize-fns.cpp
index 04656bb9..f615b612 100644
--- a/tests/test-quantize-fns.cpp
+++ b/tests/test-quantize-fns.cpp
@@ -150,6 +150,7 @@ int main(int argc, char * argv[]) {
const float total_error = total_quantization_error(qfns, test_size, test_data.data());
const float max_quantization_error =
type == GGML_TYPE_Q2_K ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
+ type == GGML_TYPE_IQ2_S ? MAX_QUANTIZATION_TOTAL_ERROR_2BITS :
type == GGML_TYPE_Q3_K ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
type == GGML_TYPE_IQ3_S ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS :
type == GGML_TYPE_IQ3_XXS ? MAX_QUANTIZATION_TOTAL_ERROR_3BITS_XXS : MAX_QUANTIZATION_TOTAL_ERROR;
@@ -168,7 +169,8 @@ int main(int argc, char * argv[]) {
const float vec_dot_error = dot_product_error(qfns, test_size, test_data.data(), test_data2.data());
const float max_allowed_error = type == GGML_TYPE_Q2_K || type == GGML_TYPE_IQ2_XS || type == GGML_TYPE_IQ2_XXS ||
- type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S ? MAX_DOT_PRODUCT_ERROR_LOWBIT
+ type == GGML_TYPE_IQ3_XXS || type == GGML_TYPE_IQ3_S || type == GGML_TYPE_IQ2_S
+ ? MAX_DOT_PRODUCT_ERROR_LOWBIT
: MAX_DOT_PRODUCT_ERROR;
failed = !(vec_dot_error < max_allowed_error);
num_failed += failed;