summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp8
1 files changed, 4 insertions, 4 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index c2bc5cc0..836fd97a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -16906,8 +16906,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
else chunk_size_multiplier = 4;
}
else if (new_type == GGML_TYPE_IQ4_XS_R4) {
- if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_IQ4_XS;
- else chunk_size_multiplier = 4;
+ if (tensor->ne[1] % 8 != 0) new_type = GGML_TYPE_IQ4_XS;
+ else chunk_size_multiplier = 8;
}
else if (new_type == GGML_TYPE_Q4_0_R4) {
if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q4_0;
@@ -16922,8 +16922,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
else chunk_size_multiplier = 4;
}
else if (new_type == GGML_TYPE_Q8_0_R4) {
- if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q8_0;
- else chunk_size_multiplier = 4;
+ if (tensor->ne[1] % 8 != 0) new_type = GGML_TYPE_Q8_0;
+ else chunk_size_multiplier = 8;
}
else if (new_type == GGML_TYPE_Q2_K_R4) {
if (tensor->ne[1] % 4 != 0) new_type = GGML_TYPE_Q2_K;