summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
authorKawrakow <48489457+ikawrakow@users.noreply.github.com>2024-01-14 16:21:12 +0200
committerGitHub <noreply@github.com>2024-01-14 16:21:12 +0200
commit467a882fd2e5b6172897b49aa45aa29bd3f27685 (patch)
tree39f03df9b8418028c59380a8a4555395ba13f685 /ggml.c
parentbb0c1392479398f9aba86d9ec98db0b95ede6e6d (diff)
Add ability to use importance matrix for all k-quants (#4930)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c28
1 files changed, 20 insertions, 8 deletions
diff --git a/ggml.c b/ggml.c
index 52467475..ef5888ab 100644
--- a/ggml.c
+++ b/ggml.c
@@ -18713,26 +18713,38 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
case GGML_TYPE_Q3_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q3_K * block = (block_q3_K*)dst + start / QK_K;
- result = ggml_quantize_q3_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q3_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q4_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q4_K * block = (block_q4_K*)dst + start / QK_K;
- result = ggml_quantize_q4_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q4_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q5_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q5_K * block = (block_q5_K*)dst + start / QK_K;
- result = ggml_quantize_q5_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q5_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q6_K:
{
GGML_ASSERT(start % QK_K == 0);
- block_q6_K * block = (block_q6_K*)dst + start / QK_K;
- result = ggml_quantize_q6_K(src + start, block, n, n, hist);
+ GGML_ASSERT(start % n_per_row == 0);
+ size_t start_row = start / n_per_row;
+ size_t row_size = ggml_row_size(type, n_per_row);
+ result = quantize_q6_K(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
+ GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_IQ2_XXS:
{