summaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-02-26 18:28:38 +0200
committerGitHub <noreply@github.com>2024-02-26 18:28:38 +0200
commita33e6a0d2a66104ea9a906bdbf8a94d050189d91 (patch)
tree30478b4a0b1792d1af66c5d64e2c3c4fa1af74ab /llama.h
parent47bb7b48c7cec9d8f57d56812ce811ec130b89a3 (diff)
Adding IQ2_S and IQ2_M to complete coverage of the 2-3 bit quantization range (#5721)
* Adding IQ2_S and IQ2_M as a single cumulative commit * Update examples/quantize/quantize.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h4
1 files changed, 3 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index ff131996..3ff77d5a 100644
--- a/llama.h
+++ b/llama.h
@@ -107,12 +107,14 @@ extern "C" {
LLAMA_FTYPE_MOSTLY_IQ2_XXS = 19, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ2_XS = 20, // except 1d tensors
LLAMA_FTYPE_MOSTLY_Q2_K_S = 21, // except 1d tensors
- LLAMA_FTYPE_MOSTLY_Q3_K_XS = 22, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_IQ3_XS = 22, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ3_XXS = 23, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ1_S = 24, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ4_NL = 25, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ3_S = 26, // except 1d tensors
LLAMA_FTYPE_MOSTLY_IQ3_M = 27, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_IQ2_S = 28, // except 1d tensors
+ LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
};