diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-08-05 11:59:36 +0300 |
---|---|---|
committer | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-08-05 11:39:10 +0200 |
commit | b409c153636d27473970abd3a9c9400b6287d400 (patch) | |
tree | 7bdba4859b8a66fa39ec237b87db56399edacebb /src/llama.cpp | |
parent | c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff) |
q2_K: allow it to detect ternary nets and quantize accordingly
Diffstat (limited to 'src/llama.cpp')
-rw-r--r-- | src/llama.cpp | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/llama.cpp b/src/llama.cpp index 2caaf7d0..e530f528 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16071,12 +16071,13 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } } } - if ((new_type == GGML_TYPE_IQ2_XXS || + if (!params->ignore_imatrix_rules && !imatrix && + (new_type == GGML_TYPE_IQ2_XXS || new_type == GGML_TYPE_IQ2_XS || new_type == GGML_TYPE_IQ2_S || new_type == GGML_TYPE_IQ1_S || (new_type == GGML_TYPE_IQ1_M && strcmp(tensor->name, "token_embd.weight") && strcmp(tensor->name, "output.weight")) || - (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0)) && !imatrix) { + (new_type == GGML_TYPE_Q2_K && params->ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S && strcmp(tensor->name, "token_embd.weight") != 0))) { LLAMA_LOG_ERROR("\n\n============================================================\n"); LLAMA_LOG_ERROR("Missing importance matrix for tensor %s in a very low-bit quantization\n", tensor->name); LLAMA_LOG_ERROR("The result will be garbage, so bailing out\n"); @@ -16441,6 +16442,7 @@ struct llama_model_quantize_params llama_model_quantize_default_params() { /*.only_copy =*/ false, /*.pure =*/ false, /*.keep_split =*/ false, + /*.ignore_imatrix_rules =*/ false, /*.imatrix =*/ nullptr, /*.kv_overrides =*/ nullptr, }; |