summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-01-14 09:45:56 +0200
committerGitHub <noreply@github.com>2024-01-14 09:45:56 +0200
commit147b17ac94a24d524e367cda26a9ff6245689f34 (patch)
tree6bae34826f82aa28a60ccb26de8eda0464774110 /llama.h
parent807179ec583dcb882f97d9704577c06beb2c5ec9 (diff)
2-bit quantizations (#4897)
* imatrix: load * imatrix: WIP * imatrix: Add Q2_K quantization * imatrix: also guard against Q2_K_S quantization without importance matrix * imatrix: guard even more against low-bit quantization misuse --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h1
1 files changed, 1 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 01d6fafa..79c8335b 100644
--- a/llama.h
+++ b/llama.h
@@ -249,6 +249,7 @@ extern "C" {
bool quantize_output_tensor; // quantize output.weight
bool only_copy; // only copy tensors - ftype, allow_requantize and quantize_output_tensor are ignored
bool pure; // disable k-quant mixtures and quantize all tensors to the same type
+ void * imatrix; // pointer to importance matrix data
} llama_model_quantize_params;
// grammar types