1 files changed, 203 insertions, 86 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index 391d9e2e..3c4711f3 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -13991,6 +13991,105 @@ static int iq1_sort_helper(const void * left, const void * right) {
     return *l < *r ? -1 : *l > *r ? 1 : 0;
 }
 
+void iq1s_process_1block(int block_size, const float * xb, const float * weight, int8_t * L, float * the_scale, uint16_t * the_index, int * the_shift,
+        float * pairs, float * sumx, float * sumw) {
+    float max = fabsf(xb[0]);
+    for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
+    if (max < GROUP_MAX_EPS_IQ1_S) {
+        *the_scale = 0;
+        *the_shift = 1;
+        for (int k = 0; k < block_size/8; ++k) the_index[k] = 0;
+        return;
+    }
+    const int gindex = iq2_data_index(GGML_TYPE_IQ1_S);
+    const uint64_t * kgrid_q2xs      = iq2_data[gindex].grid;
+    const int      * kmap_q2xs       = iq2_data[gindex].map;
+    const uint16_t * kneighbors_q2xs = iq2_data[gindex].neighbours;
+
+    GGML_ASSERT(kgrid_q2xs      && "forgot to call ggml_quantize_init()?");
+    GGML_ASSERT(kmap_q2xs       && "forgot to call ggml_quantize_init()?");
+    GGML_ASSERT(kneighbors_q2xs && "forgot to call ggml_quantize_init()?");
+
+    const float x_p[3] = {-1 + IQ1S_DELTA,  IQ1S_DELTA, 1 + IQ1S_DELTA};
+    const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
+
+    // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
+    // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
+    // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
+    // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
+    // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
+    // for each possible and score for each split.
+    int * idx = (int *)(pairs + 1);
+    for (int j = 0; j < block_size; ++j) {
+        pairs[2*j] = xb[j];
+        idx[2*j] = j;
+    }
+    qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
+    {
+        sumx[0] = sumw[0] = 0;
+        for (int j = 0; j < block_size; ++j) {
+            int i = idx[2*j];
+            sumx[j+1] = sumx[j] + weight[i]*xb[i];
+            sumw[j+1] = sumw[j] + weight[i];
+        }
+    }
+    float best_score = -FLT_MIN, scale = max;
+    int besti1 = -1, besti2 = -1, best_shift = 0;
+    for (int i1 = 0; i1 <= block_size; ++i1) {
+        for (int i2 = i1; i2 <= block_size; ++i2) {
+            float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
+            float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
+            if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+                scale = sumqx/sumq2; best_score = scale*sumqx;
+                besti1 = i1; besti2 = i2; best_shift = 1;
+            }
+            sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
+            sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
+            if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+                scale = sumqx/sumq2; best_score = scale*sumqx;
+                besti1 = i1; besti2 = i2; best_shift = -1;
+            }
+        }
+    }
+    GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
+    for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0;
+    for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
+    for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
+    if (scale < 0) {
+        for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
+        scale = -scale; best_shift = -best_shift;
+    }
+    bool all_on_grid = true;
+    const float * xx = best_shift == 1 ? x_p : x_m;
+    for (int k = 0; k < block_size/8; ++k) {
+        uint16_t u = 0;
+        for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
+        int grid_index = kmap_q2xs[u];
+        if (grid_index < 0) {
+            all_on_grid = false;
+            const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
+            grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
+            GGML_ASSERT(grid_index >= 0);
+        }
+        the_index[k] = grid_index;
+    }
+    if (!all_on_grid) {
+        float sumqx = 0, sumq2 = 0;
+        for (int k = 0; k < block_size/8; ++k) {
+            const int8_t * pg = (const int8_t *)(kgrid_q2xs + the_index[k]);
+            for (int j = 0; j < 8; ++j) {
+                float w = weight[8*k + j];
+                float q = xx[(pg[j] - 1)/2];
+                sumqx += w*q*xb[8*k+j];
+                sumq2 += w*q*q;
+            }
+        }
+        if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
+    }
+    *the_scale = scale;
+    *the_shift = best_shift;
+}
+
 #define IQ1S_BLOCK_SIZE 32
 #define IQ1M_BLOCK_SIZE 16
 static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
@@ -14021,11 +14120,10 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
 
     const int block_size = IQ1S_BLOCK_SIZE;
 
-    const float x_p[3] = {-1 + IQ1S_DELTA,  IQ1S_DELTA, 1 + IQ1S_DELTA};
-    const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
-
+    //const float x_p[3] = {-1 + IQ1S_DELTA,  IQ1S_DELTA, 1 + IQ1S_DELTA};
+    //const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
 
-    int * idx = (int *)(pairs + 1);
+    //int * idx = (int *)(pairs + 1);
 
     for (int ibl = 0; ibl < nbl; ++ibl) {
 
@@ -14044,95 +14142,100 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
             const float * xb = xbl + block_size*ib;
             const float * qw = quant_weights + QK_K*ibl + block_size*ib;
             for (int i = 0; i < block_size; ++i) weight[i] = qw[i] * sqrtf(sigma2 + xb[i]*xb[i]);
-            float max = fabsf(xb[0]);
-            for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
-            if (max < GROUP_MAX_EPS_IQ1_S) {
-                scales[ib] = 0;
-                memset(L, 1, block_size);
-                continue;
-            }
-            // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
-            // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
-            // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
-            // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
-            // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
-            // for each possible and score for each split.
-            for (int j = 0; j < block_size; ++j) {
-                pairs[2*j] = xb[j];
-                idx[2*j] = j;
-            }
-            qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
-            {
-                sumx[0] = sumw[0] = 0;
-                for (int j = 0; j < block_size; ++j) {
-                    int i = idx[2*j];
-                    sumx[j+1] = sumx[j] + weight[i]*xb[i];
-                    sumw[j+1] = sumw[j] + weight[i];
-                }
-            }
-            float best_score = -FLT_MIN, scale = max;
-            int besti1 = -1, besti2 = -1, best_shift = 0;
-            for (int i1 = 0; i1 <= block_size; ++i1) {
-                for (int i2 = i1; i2 <= block_size; ++i2) {
-                    float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
-                    float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
-                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
-                        scale = sumqx/sumq2; best_score = scale*sumqx;
-                        besti1 = i1; besti2 = i2; best_shift = 1;
-                    }
-                    sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
-                    sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
-                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
-                        scale = sumqx/sumq2; best_score = scale*sumqx;
-                        besti1 = i1; besti2 = i2; best_shift = -1;
-                    }
-                }
-            }
-            GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
-            for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0;
-            for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
-            for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
-            if (scale < 0) {
-                for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
-                scale = -scale; best_shift = -best_shift;
-            }
-            bool all_on_grid = true;
-            const float * xx = best_shift == 1 ? x_p : x_m;
-            for (int k = 0; k < block_size/8; ++k) {
-                uint16_t u = 0;
-                for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
-                int grid_index = kmap_q2xs[u];
-                if (grid_index < 0) {
-                    all_on_grid = false;
-                    const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
-                    grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
-                    GGML_ASSERT(grid_index >= 0);
-                }
-                index[k] = grid_index;
-            }
-            if (!all_on_grid) {
-                float sumqx = 0, sumq2 = 0;
-                for (int k = 0; k < block_size/8; ++k) {
-                    const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
-                    for (int j = 0; j < 8; ++j) {
-                        float w = weight[8*k + j];
-                        float q = xx[(pg[j] - 1)/2];
-                        sumqx += w*q*xb[8*k+j];
-                        sumq2 += w*q*q;
-                    }
-                }
-                if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
-            }
+            int best_shift;
+            iq1s_process_1block(block_size, xb, weight, L, &scales[ib], index, &best_shift, pairs, sumx, sumw);
+
+//            float max = fabsf(xb[0]);
+//            for (int i = 1; i < block_size; ++i) max = MAX(max, fabsf(xb[i]));
+//            if (max < GROUP_MAX_EPS_IQ1_S) {
+//                scales[ib] = 0;
+//                memset(L, 1, block_size);
+//                continue;
+//            }
+//            // Here we solve exactly the sum of squared difference (SSD) weighted minimization problem.
+//            // With just 3 allowed quant values (-1, 0, 1), we can search exhaustively for the two
+//            // boundaries that split the weights xb[i] into 3 groups. To do so, we sort the weights
+//            // in ascending order, compute Si = sum[weight[j] xb[j], j = 0...i] and
+//            // Wi = sum[weight[j], j = 0...i], and use these to quckly get get the optimum scale
+//            // for each possible and score for each split.
+//            for (int j = 0; j < block_size; ++j) {
+//                pairs[2*j] = xb[j];
+//                idx[2*j] = j;
+//            }
+//            qsort(pairs, block_size, 2*sizeof(float), iq1_sort_helper);
+//            {
+//                sumx[0] = sumw[0] = 0;
+//                for (int j = 0; j < block_size; ++j) {
+//                    int i = idx[2*j];
+//                    sumx[j+1] = sumx[j] + weight[i]*xb[i];
+//                    sumw[j+1] = sumw[j] + weight[i];
+//                }
+//            }
+//            float best_score = -FLT_MIN, scale = max;
+//            int besti1 = -1, besti2 = -1, best_shift = 0;
+//            for (int i1 = 0; i1 <= block_size; ++i1) {
+//                for (int i2 = i1; i2 <= block_size; ++i2) {
+//                    float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[block_size] - sumx[i2])*x_p[2];
+//                    float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[block_size] - sumw[i2])*x_p[2]*x_p[2];
+//                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+//                        scale = sumqx/sumq2; best_score = scale*sumqx;
+//                        besti1 = i1; besti2 = i2; best_shift = 1;
+//                    }
+//                    sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[block_size] - sumx[i2])*x_m[2];
+//                    sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[block_size] - sumw[i2])*x_m[2]*x_m[2];
+//                    if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
+//                        scale = sumqx/sumq2; best_score = scale*sumqx;
+//                        besti1 = i1; besti2 = i2; best_shift = -1;
+//                    }
+//                }
+//            }
+//            GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
+//            for (int j =      0; j < besti1; ++j) L[idx[2*j]] = 0;
+//            for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
+//            for (int j = besti2; j < block_size; ++j) L[idx[2*j]] = 2;
+//            if (scale < 0) {
+//                for (int j = 0; j < block_size; ++j) L[j] = 2 - L[j];
+//                scale = -scale; best_shift = -best_shift;
+//            }
+//            bool all_on_grid = true;
+//            const float * xx = best_shift == 1 ? x_p : x_m;
+//            for (int k = 0; k < block_size/8; ++k) {
+//                uint16_t u = 0;
+//                for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
+//                int grid_index = kmap_q2xs[u];
+//                if (grid_index < 0) {
+//                    all_on_grid = false;
+//                    const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
+//                    grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
+//                    GGML_ASSERT(grid_index >= 0);
+//                }
+//                index[k] = grid_index;
+//            }
+//            if (!all_on_grid) {
+//                float sumqx = 0, sumq2 = 0;
+//                for (int k = 0; k < block_size/8; ++k) {
+//                    const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
+//                    for (int j = 0; j < 8; ++j) {
+//                        float w = weight[8*k + j];
+//                        float q = xx[(pg[j] - 1)/2];
+//                        sumqx += w*q*xb[8*k+j];
+//                        sumq2 += w*q*q;
+//                    }
+//                }
+//                if (sumqx > 0 && sumq2 > 0) scale = sumqx/sumq2;
+//            }
             uint16_t h = 0;
             for (int k = 0; k < block_size/8; ++k) {
                 y[ibl].qs[(block_size/8)*ib + k] = index[k] & 255;
                 h |= (index[k] >> 8) << 3*k;
             }
             y[ibl].qh[ib] = h;
-            GGML_ASSERT(scale >= 0);
-            scales[ib] = scale;
+            GGML_ASSERT(scales[ib] >= 0);
+            max_scale = MAX(max_scale, scales[ib]);
+            //GGML_ASSERT(scale >= 0);
+            //scales[ib] = scale;
             shifts[ib] = best_shift;
-            max_scale = MAX(max_scale, scale);
+            //max_scale = MAX(max_scale, scale);
         }
 
         if (!max_scale) {
@@ -14171,6 +14274,19 @@ size_t quantize_iq1_s(const float * restrict src, void * restrict dst, int64_t n
     return nrow * nblock * sizeof(block_iq1_s);
 }
 
+void quantize_row_iq1_s_ref  (const float * GGML_RESTRICT x, block_iq1_s   * GGML_RESTRICT y, int64_t k) {
+    int nblock = k/QK_K;
+    float qw[QK_K];
+    for (int j = 0; j < QK_K; ++j) qw[j] = 1;
+    for (int ibl = 0; ibl < nblock; ++ibl) {
+        quantize_iq1_s(x + ibl*QK_K, &y[ibl], 1, QK_K, qw);
+    }
+}
+
+void quantize_row_iq1_s  (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) {
+    quantize_row_iq1_s_ref(x, (block_iq1_s *)y, k);
+}
+
 static void quantize_row_iq1_m_impl(const float * restrict x, void * restrict vy, int64_t n, const float * restrict quant_weights,
         float    * scales,
         float    * weight,
@@ -15129,6 +15245,7 @@ bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbyte
         case GGML_TYPE_IQ3_XXS_R4: break;
         case GGML_TYPE_IQ3_S_R4: break;
         case GGML_TYPE_IQ2_S_R4: break;
+        case GGML_TYPE_IQ1_S_R4: break;
         case GGML_TYPE_Q4_0_R4: break;
         case GGML_TYPE_Q5_0_R4: break;
         case GGML_TYPE_Q6_0_R4: break;