summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-08-05 11:59:36 +0300
committerKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-08-05 11:39:10 +0200
commitb409c153636d27473970abd3a9c9400b6287d400 (patch)
tree7bdba4859b8a66fa39ec237b87db56399edacebb /src/llama.cpp
parentc11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)
q2_K: allow it to detect ternary nets and quantize accordingly
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 2caaf7d0..e530f528 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16071,12 +16071,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
}
}
}
- if ((new_type == GGML_TYPE_IQ2_XXS ||
+ if (!params->ignore_imatrix_rules && !imatrix &&
+ (new_type == GGML_TYPE_IQ2_XXS ||
new_type == GGML_TYPE_IQ2_XS ||
new_type == GGML_TYPE_IQ2_S ||
new_type == GGML_TYPE_IQ1_S ||
(new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) ||
- (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) {
+ (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) {
LLAMA_LOG_ERROR("\n\n============================================================\n");
LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name);
LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n");
@@ -16441,6 +16442,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() {
/*.only_copy =*/ false,
/*.pure =*/ false,
/*.keep_split =*/ false,
+ /*.ignore_imatrix_rules =*/ false,
/*.imatrix =*/ nullptr,
/*.kv_overrides =*/ nullptr,
};