diff options
Diffstat (limited to 'src/llama.cpp')
-rw-r--r-- | src/llama.cpp | 8 |
1 files changed, 4 insertions, 4 deletions
diff --git a/src/llama.cpp b/src/llama.cpp index c2bc5cc0..836fd97a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -16906,8 +16906,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s else chunk_size_multiplier = 4; } else if (new_type == GGML_TYPE_IQ4_XS_R4) { - if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_IQ4_XS; - else chunk_size_multiplier = 4; + if (tensor->ne[1] % 8 != 0) new_type = GGML_TYPE_IQ4_XS; + else chunk_size_multiplier = 8; } else if (new_type == GGML_TYPE_Q4_0_R4) { if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q4_0; @@ -16922,8 +16922,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s else chunk_size_multiplier = 4; } else if (new_type == GGML_TYPE_Q8_0_R4) { - if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q8_0; - else chunk_size_multiplier = 4; + if (tensor->ne[1] % 8 != 0) new_type = GGML_TYPE_Q8_0; + else chunk_size_multiplier = 8; } else if (new_type == GGML_TYPE_Q2_K_R4) { if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q2_K; |