summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-05-03 14:43:55 +0300
committerGitHub <noreply@github.com>2025-05-03 14:43:55 +0300
commitafcfa85756ec7a476ed1daf79ed4152625dd8c7c (patch)
treecce54ced86ecbc24f5a0725fb2392319a2c10fc6
parent1ea1df4b2d942ebd56efdcdfb922ec92d6dc1db7 (diff)
Trying to fix iq1_s_r4/iq1_m_r4 quantization failure (#368)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--ggml/src/iqk/iqk_quantize.cpp11
1 files changed, 8 insertions, 3 deletions
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index 7873d3fe..b95493eb 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -6572,8 +6572,8 @@ size_t quantize_iq1_s_r4(const float * src, void * dst, int64_t nrows, int64_t n
auto xb = src + k*n_per_row + kBlockSize*ibl;
float sumx2 = 0;
for (int j = 0; j < kBlockSize; ++j) sumx2 += xb[j]*xb[j];
- if (!sumx2) {
- printf("Found block with all zeros\n");
+ if (sumx2 < 1e-14f) {
+ //printf("Found block with all zeros\n");
// all zero
int ind = 1029; // this is the grid entry with all zeros
scales[4*ibl+k] = 0;
@@ -6703,13 +6703,18 @@ size_t quantize_iq1_m_r4(const float * src, void * dst, int64_t nrows, int64_t n
auto xb = src + k*n_per_row + kBlockSize*ibl;
float sumx2 = 0;
for (int j = 0; j < kBlockSize; ++j) sumx2 += xb[j]*xb[j];
- if (!sumx2) {
+ if (sumx2 < 1e-14f) {
scales[8*ibl+2*k+0] = scales[8*ibl+2*k+1] = 0;
continue;
}
float sigma2 = 1.5f*sumx2/kBlockSize;
if (imatrix) {
for (int j = 0; j < kBlockSize; ++j) weight[j] = imatrix[kBlockSize*ibl + j]*sqrt(sigma2 + xb[j]*xb[j]);
+ float sumwx = 0;
+ for (int j = 0; j < kBlockSize; ++j) sumwx += weight[j]*std::abs(xb[j]);
+ if (!sumwx) {
+ for (int j = 0; j < kBlockSize; ++j) weight[j] = sqrt(sigma2 + xb[j]*xb[j]);
+ }
} else {
for (int j = 0; j < kBlockSize; ++j) weight[j] = sqrt(sigma2 + xb[j]*xb[j]);
}