q2_K: allow it to detect ternary nets and quantize accordingly

author: Iwan Kawrakow <iwan.kawrakow@gmail.com> 2024-08-05 11:59:36 +0300
committer: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2024-08-05 11:39:10 +0200
commit: b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree: 7bdba4859b8a66fa39ec237b87db56399edacebb /src/llama.cpp
parent: c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)
1 files changed, 4 insertions, 2 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 2caaf7d0..e530f528 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16071,12 +16071,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
                     }
                 }
             }
-            if ((new_type == GGML_TYPE_IQ2_XXS ||
+            if (!params->ignore_imatrix_rules && !imatrix &&
+                (new_type == GGML_TYPE_IQ2_XXS ||
                  new_type == GGML_TYPE_IQ2_XS  ||
                  new_type == GGML_TYPE_IQ2_S   ||
                  new_type == GGML_TYPE_IQ1_S   ||
                 (new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight"))  ||
-                (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
+                (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
                 LLAMA_LOG_ERROR("\n\n============================================================\n");
                 LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
                 LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
@@ -16441,6 +16442,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
         /*.only_copy                   =*/ false,
         /*.pure                        =*/ false,
         /*.keep_split                  =*/ false,
+        /*.ignore_imatrix_rules        =*/ false,
         /*.imatrix                     =*/ nullptr,
         /*.kv_overrides                =*/ nullptr,
     };
author	Iwan Kawrakow <iwan.kawrakow@gmail.com>	2024-08-05 11:59:36 +0300
committer	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2024-08-05 11:39:10 +0200
commit	b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree	7bdba4859b8a66fa39ec237b87db56399edacebb /src/llama.cpp
parent	c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)