From 4b45b82e67d9362e7522e5c7107e9d99219e0432 Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Thu, 20 Feb 2025 17:42:07 +0200 Subject: Honor attn_output specified in the command line also for low-bit quants --- src/llama.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/llama.cpp b/src/llama.cpp index 0257a0a3..28e887ee 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16339,7 +16339,8 @@ static ggml_type llama_tensor_get_type(quantize_state_internal & qs, ggml_type n ++qs.i_ffn_down; } else if (name.find("attn_output.weight") != std::string::npos) { - if (qs.model.hparams.n_expert >= 4) { + if (qs.params->attn_output_type < GGML_TYPE_COUNT) new_type = qs.params->attn_output_type; + else if (qs.model.hparams.n_expert >= 4) { new_type = GGML_TYPE_Q5_K; } else { if (ftype == LLAMA_FTYPE_MOSTLY_IQ1_S || ftype == LLAMA_FTYPE_MOSTLY_IQ1_M) new_type = GGML_TYPE_IQ2_K; -- cgit v1.2.3