summaryrefslogtreecommitdiff
path: root/ggml/src/ggml-quants.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/ggml-quants.c')
-rw-r--r--ggml/src/ggml-quants.c289
1 files changed, 203 insertions, 86 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 391d9e2e..3c4711f3 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -13991,6 +13991,105 @@ static int iq1_sort_helper(const void * left, const void * right) {
return *l < *r ? -1 : *l > *r ? 1 : 0;
}
+void iq1s_process_1block(int block_size, const float * xb, const float * weight, int8_t * L, float * the_scale, uint16_t * the_index, int * the_shift,
+ float * pairs, float * sumx, float * sumw) {
+ float max = fabsf(xb[0]);
+ for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
+ if (max < GROUP_MAX_EPS_IQ1_S) {
+ *the_scale = 0;
+ *the_shift = 1;
+ for (int k = 0; k < block_size/8; ++k) the_index[k] = 0;
+ return;
+ }
+ const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
+ const uint64_t * kgrid_q2xs = iq2_data[gindex].grid;
+ const int * kmap_q2xs = iq2_data[gindex].map;
+ const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
+
+ GGML_ASSERT(kgrid_q2xs && "forgot to call ggml_quantize_init()?");
+ GGML_ASSERT(kmap_q2xs && "forgot to call ggml_quantize_init()?");
+ GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
+
+ const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
+ const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
+
+ // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
+ // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
+ // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
+ // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
+ // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
+ // for each possible and score for each split.
+ int * idx = (int *)(pairs + 1);
+ for (int j = 0; j < block_size; ++j) {
+ pairs[2*j] = xb[j];
+ idx[2*j] = j;
+ }
+ qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
+ {
+ sumx[0] = sumw[0] = 0;
+ for (int j = 0; j < block_size; ++j) {
+ int i = idx[2*j];
+ sumx[j+1] = sumx[j] + weight[i]*xb[i];
+ sumw[j+1] = sumw[j] + weight[i];
+ }
+ }
+ float best_score = -FLT_MIN, scale = max;
+ int besti1 = -1, besti2 = -1, best_shift = 0;
+ for (int i1 = 0; i1 <= block_size; ++i1) {
+ for (int i2 = i1; i2 <= block_size; ++i2) {
+ float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
+ float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
+ if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+ scale = sumqx/sumq2; best_score = scale*sumqx;
+ besti1 = i1; besti2 = i2; best_shift = 1;
+ }
+ sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
+ sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
+ if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+ scale = sumqx/sumq2; best_score = scale*sumqx;
+ besti1 = i1; besti2 = i2; best_shift = -1;
+ }
+ }
+ }
+ GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
+ for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
+ for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
+ for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
+ if (scale < 0) {
+ for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
+ scale = -scale; best_shift = -best_shift;
+ }
+ bool all_on_grid = true;
+ const float * xx = best_shift == 1 ? x_p : x_m;
+ for (int k = 0; k < block_size/8; ++k) {
+ uint16_t u = 0;
+ for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
+ int grid_index = kmap_q2xs[u];
+ if (grid_index < 0) {
+ all_on_grid = false;
+ const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
+ grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
+ GGML_ASSERT(grid_index >= 0);
+ }
+ the_index[k] = grid_index;
+ }
+ if (!all_on_grid) {
+ float sumqx = 0, sumq2 = 0;
+ for (int k = 0; k < block_size/8; ++k) {
+ const int8_t * pg = (const int8_t *)(kgrid_q2xs + the_index[k]);
+ for (int j = 0; j < 8; ++j) {
+ float w = weight[8*k + j];
+ float q = xx[(pg[j] - 1)/2];
+ sumqx += w*q*xb[8*k+j];
+ sumq2 += w*q*q;
+ }
+ }
+ if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
+ }
+ *the_scale = scale;
+ *the_shift = best_shift;
+}
+
#define IQ1S_BLOCK_SIZE 32
#define IQ1M_BLOCK_SIZE 16
static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
@@ -14021,11 +14120,10 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
const int block_size = IQ1S_BLOCK_SIZE;
- const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
- const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
-
+ //const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
+ //const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
- int * idx = (int *)(pairs + 1);
+ //int * idx = (int *)(pairs + 1);
for (int ibl = 0; ibl < nbl; ++ibl) {
@@ -14044,95 +14142,100 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
const float * xb = xbl + block_size*ib;
const float * qw = quant_weights + QK_K*ibl + block_size*ib;
for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
- float max = fabsf(xb[0]);
- for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
- if (max < GROUP_MAX_EPS_IQ1_S) {
- scales[ib] = 0;
- memset(L, 1, block_size);
- continue;
- }
- // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
- // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
- // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
- // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
- // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
- // for each possible and score for each split.
- for (int j = 0; j < block_size; ++j) {
- pairs[2*j] = xb[j];
- idx[2*j] = j;
- }
- qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
- {
- sumx[0] = sumw[0] = 0;
- for (int j = 0; j < block_size; ++j) {
- int i = idx[2*j];
- sumx[j+1] = sumx[j] + weight[i]*xb[i];
- sumw[j+1] = sumw[j] + weight[i];
- }
- }
- float best_score = -FLT_MIN, scale = max;
- int besti1 = -1, besti2 = -1, best_shift = 0;
- for (int i1 = 0; i1 <= block_size; ++i1) {
- for (int i2 = i1; i2 <= block_size; ++i2) {
- float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
- float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
- if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
- scale = sumqx/sumq2; best_score = scale*sumqx;
- besti1 = i1; besti2 = i2; best_shift = 1;
- }
- sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
- sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
- if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
- scale = sumqx/sumq2; best_score = scale*sumqx;
- besti1 = i1; besti2 = i2; best_shift = -1;
- }
- }
- }
- GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
- for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
- for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
- for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
- if (scale < 0) {
- for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
- scale = -scale; best_shift = -best_shift;
- }
- bool all_on_grid = true;
- const float * xx = best_shift == 1 ? x_p : x_m;
- for (int k = 0; k < block_size/8; ++k) {
- uint16_t u = 0;
- for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
- int grid_index = kmap_q2xs[u];
- if (grid_index < 0) {
- all_on_grid = false;
- const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
- grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
- GGML_ASSERT(grid_index >= 0);
- }
- index[k] = grid_index;
- }
- if (!all_on_grid) {
- float sumqx = 0, sumq2 = 0;
- for (int k = 0; k < block_size/8; ++k) {
- const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
- for (int j = 0; j < 8; ++j) {
- float w = weight[8*k + j];
- float q = xx[(pg[j] - 1)/2];
- sumqx += w*q*xb[8*k+j];
- sumq2 += w*q*q;
- }
- }
- if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
- }
+ int best_shift;
+ iq1s_process_1block(block_size, xb, weight, L, &scales[ib], index, &best_shift, pairs, sumx, sumw);
+
+// float max = fabsf(xb[0]);
+// for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
+// if (max < GROUP_MAX_EPS_IQ1_S) {
+// scales[ib] = 0;
+// memset(L, 1, block_size);
+// continue;
+// }
+// // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
+// // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
+// // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
+// // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
+// // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
+// // for each possible and score for each split.
+// for (int j = 0; j < block_size; ++j) {
+// pairs[2*j] = xb[j];
+// idx[2*j] = j;
+// }
+// qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
+// {
+// sumx[0] = sumw[0] = 0;
+// for (int j = 0; j < block_size; ++j) {
+// int i = idx[2*j];
+// sumx[j+1] = sumx[j] + weight[i]*xb[i];
+// sumw[j+1] = sumw[j] + weight[i];
+// }
+// }
+// float best_score = -FLT_MIN, scale = max;
+// int besti1 = -1, besti2 = -1, best_shift = 0;
+// for (int i1 = 0; i1 <= block_size; ++i1) {
+// for (int i2 = i1; i2 <= block_size; ++i2) {
+// float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
+// float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
+// if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+// scale = sumqx/sumq2; best_score = scale*sumqx;
+// besti1 = i1; besti2 = i2; best_shift = 1;
+// }
+// sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
+// sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
+// if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+// scale = sumqx/sumq2; best_score = scale*sumqx;
+// besti1 = i1; besti2 = i2; best_shift = -1;
+// }
+// }
+// }
+// GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
+// for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
+// for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
+// for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
+// if (scale < 0) {
+// for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
+// scale = -scale; best_shift = -best_shift;
+// }
+// bool all_on_grid = true;
+// const float * xx = best_shift == 1 ? x_p : x_m;
+// for (int k = 0; k < block_size/8; ++k) {
+// uint16_t u = 0;
+// for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
+// int grid_index = kmap_q2xs[u];
+// if (grid_index < 0) {
+// all_on_grid = false;
+// const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
+// grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
+// GGML_ASSERT(grid_index >= 0);
+// }
+// index[k] = grid_index;
+// }
+// if (!all_on_grid) {
+// float sumqx = 0, sumq2 = 0;
+// for (int k = 0; k < block_size/8; ++k) {
+// const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
+// for (int j = 0; j < 8; ++j) {
+// float w = weight[8*k + j];
+// float q = xx[(pg[j] - 1)/2];
+// sumqx += w*q*xb[8*k+j];
+// sumq2 += w*q*q;
+// }
+// }
+// if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
+// }
uint16_t h = 0;
for (int k = 0; k < block_size/8; ++k) {
y[ibl].qs[(block_size/8)*ib + k] = index[k] & 255;
h |= (index[k] >> 8) << 3*k;
}
y[ibl].qh[ib] = h;
- GGML_ASSERT(scale >= 0);
- scales[ib] = scale;
+ GGML_ASSERT(scales[ib] >= 0);
+ max_scale = MAX(max_scale, scales[ib]);
+ //GGML_ASSERT(scale >= 0);
+ //scales[ib] = scale;
shifts[ib] = best_shift;
- max_scale = MAX(max_scale, scale);
+ //max_scale = MAX(max_scale, scale);
}
if (!max_scale) {
@@ -14171,6 +14274,19 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
return nrow * nblock * sizeof(block_iq1_s);
}
+void quantize_row_iq1_s_ref (const float * GGML_RESTRICT x, block_iq1_s * GGML_RESTRICT y, int64_t k) {
+ int nblock = k/QK_K;
+ float qw[QK_K];
+ for (int j = 0; j < QK_K; ++j) qw[j] = 1;
+ for (int ibl = 0; ibl < nblock; ++ibl) {
+ quantize_iq1_s(x + ibl*QK_K, &y[ibl], 1, QK_K, qw);
+ }
+}
+
+void quantize_row_iq1_s (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
+ quantize_row_iq1_s_ref(x, (block_iq1_s *)y, k);
+}
+
static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
float * scales,
float * weight,
@@ -15129,6 +15245,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
case GGML_TYPE_IQ3_XXS_R4: break;
case GGML_TYPE_IQ3_S_R4: break;
case GGML_TYPE_IQ2_S_R4: break;
+ case GGML_TYPE_IQ1_S_R4: break;
case GGML_TYPE_Q4_0_R4: break;
case GGML_TYPE_Q5_0_R4: break;
case GGML_TYPE_Q6_0_R4: break;