Guard against all weights in a super-block being zero (#3010)

* Guard against all weights in a super-block being zero * Also guard against extremely small weights Closes #2982 --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <48489457+ikawrakow@users.noreply.github.com> 2023-09-05 09:55:33 +0200
committer: GitHub <noreply@github.com> 2023-09-05 09:55:33 +0200
commit: d59bd97065cd7ded6c4ecab54b1d5e0b1b11e318 (patch)
tree: 77a7ea30f2106431e8b76ca20765605b3b2eff6f
parent: 35938ee3b0c16f1fbbf240dae21e0228864b938c (diff)
1 files changed, 7 insertions, 1 deletions
diff --git a/k_quants.c b/k_quants.c
index 4accd248..8742d4ae 100644
--- a/k_quants.c
+++ b/k_quants.c
@@ -83,7 +83,7 @@ static float make_qx_quants(int n, int nmax, const float * restrict x, int8_t *
         float ax = fabsf(x[i]);
         if (ax > amax) { amax = ax; max = x[i]; }
     }
-    if (!amax) { // all zero
+    if (amax < 1e-30f) { // all zero
         for (int i = 0; i < n; ++i) {
             L[i] = 0;
         }
@@ -1086,6 +1086,12 @@ void quantize_row_q6_K_reference(const float * restrict x, block_q6_K * restrict
 
         }
 
+        if (!max_abs_scale) {
+            memset(&y[i], 0, sizeof(block_q6_K));
+            y[i].d = ggml_fp32_to_fp16(0.f);
+            continue;
+        }
+
         float iscale = -128.f/max_scale;
         y[i].d = ggml_fp32_to_fp16(1/iscale);
         for (int ib = 0; ib < QK_K/16; ++ib) {
author	Kawrakow <48489457+ikawrakow@users.noreply.github.com>	2023-09-05 09:55:33 +0200
committer	GitHub <noreply@github.com>	2023-09-05 09:55:33 +0200
commit	d59bd97065cd7ded6c4ecab54b1d5e0b1b11e318 (patch)
tree	77a7ea30f2106431e8b76ca20765605b3b2eff6f
parent	35938ee3b0c16f1fbbf240dae21e0228864b938c (diff)