summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-01-14 09:44:30 +0200
committerGitHub <noreply@github.com>2024-01-14 09:44:30 +0200
commit807179ec583dcb882f97d9704577c06beb2c5ec9 (patch)
tree2eb5a87fc948df5ac14c29e694661418a38e353c
parent76484fbfd355df388f71d6edaa98e1692a74de7e (diff)
Make Q3_K_S be the same as olf Q3_K_L for Mixtral-8x7B (#4906)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--llama.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index 66494974..8e20e72a 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -8489,9 +8489,16 @@ static ggml_type get_k_quant_type(quantize_state_internal & qs, ggml_type new_ty
++qs.i_feed_forward_w2;
} else if (name.find("attn_output.weight") != std::string::npos) {
if (arch != LLM_ARCH_FALCON) {
- if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
- else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K;
- else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
+ if (qs.model.hparams.n_expert == 8) {
+ if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M ||
+ ftype == LLAMA_FTYPE_MOSTLY_Q4_K_S || ftype == LLAMA_FTYPE_MOSTLY_Q4_K_M) {
+ new_type = GGML_TYPE_Q5_K;
+ }
+ } else {
+ if (ftype == LLAMA_FTYPE_MOSTLY_Q2_K ) new_type = GGML_TYPE_Q3_K;
+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_M) new_type = GGML_TYPE_Q4_K;
+ else if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q5_K;
+ }
} else {
if (ftype == LLAMA_FTYPE_MOSTLY_Q3_K_L) new_type = GGML_TYPE_Q4_K;
}