summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c28
1 files changed, 20 insertions, 8 deletions
diff --git a/ggml.c b/ggml.c
index 52467475..ef5888ab 100644
--- a/ggml.c
+++ b/ggml.c
@@ -18713,26 +18713,38 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
case GGML_TYPE_Q3_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q3_K * block = (block_q3_K*)dst + start / QK_K;
- result = ggml_quantize_q3_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q3_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q4_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q4_K * block = (block_q4_K*)dst + start / QK_K;
- result = ggml_quantize_q4_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q4_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q5_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q5_K * block = (block_q5_K*)dst + start / QK_K;
- result = ggml_quantize_q5_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q5_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q6_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q6_K * block = (block_q6_K*)dst + start / QK_K;
- result = ggml_quantize_q6_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q6_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_IQ2_XXS:
{