q2_K: allow it to detect ternary nets and quantize accordingly

author: Iwan Kawrakow <iwan.kawrakow@gmail.com> 2024-08-05 11:59:36 +0300
committer: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2024-08-05 11:39:10 +0200
commit: b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree: 7bdba4859b8a66fa39ec237b87db56399edacebb /ggml/src
parent: c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)
1 files changed, 45 insertions, 0 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index c2c66f38..415249fb 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -1995,7 +1995,52 @@ void quantize_row_q2_K_ref(const float * restrict x, block_q2_K * restrict y, in
 
     const float q4scale = 15.f;
 
+    // Detect TriNet
+    {
+        int n = k;
+        float max = 0;
+        for (int j = 0; j < n; ++j) {
+            float ax = fabsf(x[j]);
+            max = MAX(max, ax);
+        }
+        float mse0 = 0, mse = 0;
+        for (int j = 0; j < n; ++j) {
+            int l = x[j] < -0.5f*max ? -1 : x[j] < 0.5f*max ? 0 : 1;
+            mse0 += x[j]*x[j];
+            float diff = x[j] - max*l;
+            mse += diff*diff;
+        }
+        if (mse < 0.1f*mse0) {
+            // yes, most likely trinet
+            for (int ibl = 0; ibl < nb; ++ibl) {
+                y[ibl].d = GGML_FP32_TO_FP16(max);
+                y[ibl].dmin = GGML_FP32_TO_FP16(max);
+                for (int ib = 0; ib < QK_K/16; ++ib) y[ibl].scales[ib] = 1 | (1 << 4);
+                const float * xb = x + QK_K * ibl;
+                for (int j = 0; j < QK_K; ++j) {
+                    L[j] = xb[j] < -0.5f*max ? 0 : xb[j] < 0.5f*max ? 1 : 2;
+                }
+                uint8_t * qs = y[ibl].qs;
+                for (int j = 0; j < QK_K; j += 128) {
+                    for (int l = 0; l < 32; ++l) {
+                        qs[l] = L[j + l] | (L[j + l + 32] << 2) | (L[j + l + 64] << 4) | (L[j + l + 96] << 6);
+                    }
+                    qs += 32;
+                }
+            }
+            return;
+        }
+    }
+
     for (int i = 0; i < nb; i++) {
+        //{
+        //    float max = x[0], min = x[0];
+        //    for (int j = 1; j < 256; ++j) {
+        //        max = MAX(x[j], max);
+        //        min = MIN(x[j], min);
+        //    }
+        //    printf("%s: max = %g, min = %g\n", __func__, (double)max, (double)min);
+        //}
         float max_scale = 0; // as we are deducting the min, scales are always positive
         float max_min = 0;
         for (int j = 0; j < QK_K/16; ++j) {
author	Iwan Kawrakow <iwan.kawrakow@gmail.com>	2024-08-05 11:59:36 +0300
committer	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2024-08-05 11:39:10 +0200
commit	b409c153636d27473970abd3a9c9400b6287d400 (patch)
tree	7bdba4859b8a66fa39ec237b87db56399edacebb /ggml/src
parent	c11c7c8cae5ab1abf41c16b7bb27439bb0983c54 (diff)