diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-08-05 11:59:36 +0300 |
---|---|---|
committer | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-08-05 11:39:10 +0200 |
commit | b409c153636d27473970abd3a9c9400b6287d400 (patch) | |
tree | 7bdba4859b8a66fa39ec237b87db56399edacebb /examples | |
parent | c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff) |
q2_K: allow it to detect ternary nets and quantize accordingly
Diffstat (limited to 'examples')
-rw-r--r-- | examples/quantize/quantize.cpp | 7 |
1 files changed, 5 insertions, 2 deletions
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 0b4c3444..bae071ce 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -255,6 +255,8 @@ int main(int argc, char ** argv) { for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { params.quantize_output_tensor = false; + } else if (strcmp(argv[arg_idx], "--ignore-imatrix-rules") == 0) { + params.ignore_imatrix_rules = true; } else if (strcmp(argv[arg_idx], "--output-tensor-type") == 0) { if (arg_idx < argc-1) { params.output_tensor_type = parse_ggml_type(argv[++arg_idx]); @@ -409,11 +411,12 @@ int main(int argc, char ** argv) { } } - if ((params.ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || params.ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || + if (!params.ignore_imatrix_rules && imatrix_data.empty() && + (params.ftype == LLAMA_FTYPE_MOSTLY_IQ2_XS || params.ftype == LLAMA_FTYPE_MOSTLY_IQ2_XXS || params.ftype == LLAMA_FTYPE_MOSTLY_IQ2_S || params.ftype == LLAMA_FTYPE_MOSTLY_Q2_K_S || params.ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || - params.ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) && imatrix_data.empty()) { + params.ftype == LLAMA_FTYPE_MOSTLY_IQ1_M)) { fprintf(stderr, "\n==========================================================================================================\n"); fprintf(stderr, "Please do not use IQ1_S, IQ1_M, IQ2_S, IQ2_XXS, IQ2_XS or Q2_K_S quantization without an importance matrix\n"); fprintf(stderr, "==========================================================================================================\n\n\n"); |