summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNexesenex <124105151+Nexesenex@users.noreply.github.com>2024-03-22 14:32:02 +0100
committerGitHub <noreply@github.com>2024-03-22 15:32:02 +0200
commite80f06d2a194be62ab5b1cd7ef7c7a5b241dd4fb (patch)
treeecd5d09b0c627826dd50d419f8cec057c4805d11
parentf77a8ffd3bbde77b7819823b0c006fd8c2d5cae4 (diff)
llama : correction of the attn.v.weight quantization for IQ3_XS (#6209)
IQ3_XS was not mentioned, IQ3_S and IQ3_M were present twice. That PR corrects this in the manner which was probably intended initially.
-rw-r--r--llama.cpp8
1 files changed, 1 insertions, 7 deletions
diff --git a/llama.cpp b/llama.cpp
index 9de4a860..91bd6b8d 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -12027,13 +12027,7 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_XXS) {
new_type = qs.model.hparams.n_gqa() >= 4 ? GGML_TYPE_Q4_K : !qs.has_imatrix ? GGML_TYPE_IQ3_S : GGML_TYPE_IQ3_XXS;
}
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
- new_type = GGML_TYPE_Q4_K;
- }
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {
- new_type = GGML_TYPE_Q4_K;
- }
- else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_S && qs.model.hparams.n_gqa() >= 4) {
+ else if ((ftype == LLAMA_FTYPE_MOSTLY_IQ3_XS || ftype == LLAMA_FTYPE_MOSTLY_IQ3_S) && qs.model.hparams.n_gqa() >= 4) {
new_type = GGML_TYPE_Q4_K;
}
else if (ftype == LLAMA_FTYPE_MOSTLY_IQ3_M) {