q2_K: allow it to detect ternary nets and quantize accordingly

author: Iwan Kawrakow <iwan.kawrakow@gmail.com> 2024-08-05 11:59:36 +0300
committer: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2024-08-05 11:39:10 +0200
commit: b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree: 7bdba4859b8a66fa39ec237b87db56399edacebb /include/llama.h
parent: c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)
1 files changed, 1 insertions, 0 deletions
diff --git a/include/llama.h b/include/llama.h
index 88d82958..15ff915b 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -359,6 +359,7 @@ extern "C" {
         bool only_copy;                      // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
         bool pure;                           // quantize all tensors to the default type
         bool keep_split;                     // quantize to the same number of shards
+        bool ignore_imatrix_rules;           // If set to true, the built-in rules for refusing to quantize into certain quants without imatrix are ignored
         void * imatrix;                      // pointer to importance matrix data
         void * kv_overrides;                 // pointer to vector containing overrides
     } llama_model_quantize_params;
author	Iwan Kawrakow <iwan.kawrakow@gmail.com>	2024-08-05 11:59:36 +0300
committer	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2024-08-05 11:39:10 +0200
commit	b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree	7bdba4859b8a66fa39ec237b87db56399edacebb /include/llama.h
parent	c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)