diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-08-05 11:59:36 +0300 |
---|---|---|
committer | Kawrakow <48489457+ikawrakow@users.noreply.github.com> | 2024-08-05 11:39:10 +0200 |
commit | b409c153636d27473970abd3a9c9400b6287d400 (patch) | |
tree | 7bdba4859b8a66fa39ec237b87db56399edacebb /include/llama.h | |
parent | c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff) |
q2_K: allow it to detect ternary nets and quantize accordingly
Diffstat (limited to 'include/llama.h')
-rw-r--r-- | include/llama.h | 1 |
1 files changed, 1 insertions, 0 deletions
diff --git a/include/llama.h b/include/llama.h index 88d82958..15ff915b 100644 --- a/include/llama.h +++ b/include/llama.h @@ -359,6 +359,7 @@ extern "C" { bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored bool pure; // quantize all tensors to the default type bool keep_split; // quantize to the same number of shards + bool ignore_imatrix_rules; // If set to true, the built-in rules for refusing to quantize into certain quants without imatrix are ignored void * imatrix; // pointer to importance matrix data void * kv_overrides; // pointer to vector containing overrides } llama_model_quantize_params; |