llama : check for 256 divisibility for IQ2_XS, IQ2_XXS (#4950)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2024-01-15 10:09:38 +0200
committer: GitHub <noreply@github.com> 2024-01-15 10:09:38 +0200
commit: 2faaef39799c97a53bec3898141478700da25757 (patch)
tree: b910f12af44037480d5eb0f36817ce29e02ab562
parent: 4a3156de2fac9a8ee4279de7804d4e352dcfe121 (diff)
1 files changed, 4 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index 7af38718..f9718060 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8559,7 +8559,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
     //}
     bool convert_incompatible_tensor = false;
     if (new_type == GGML_TYPE_Q2_K || new_type == GGML_TYPE_Q3_K || new_type == GGML_TYPE_Q4_K ||
-        new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K) {
+        new_type == GGML_TYPE_Q5_K || new_type == GGML_TYPE_Q6_K ||
+        new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_XXS) {
         int nx = tensor->ne[0];
         int ny = tensor->ne[1];
         if (nx % QK_K != 0) {
@@ -8571,6 +8572,8 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
     }
     if (convert_incompatible_tensor) {
         switch (new_type) {
+            case GGML_TYPE_IQ2_XXS:
+            case GGML_TYPE_IQ2_XS:
             case GGML_TYPE_Q2_K: new_type = GGML_TYPE_Q4_0; break;
             case GGML_TYPE_Q3_K: new_type = GGML_TYPE_Q4_1; break;
             case GGML_TYPE_Q4_K: new_type = GGML_TYPE_Q5_0; break;
author	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2024-01-15 10:09:38 +0200
committer	GitHub <noreply@github.com>	2024-01-15 10:09:38 +0200
commit	2faaef39799c97a53bec3898141478700da25757 (patch)
tree	b910f12af44037480d5eb0f36817ce29e02ab562
parent	4a3156de2fac9a8ee4279de7804d4e352dcfe121 (diff)