summaryrefslogtreecommitdiff
path: root/ggml/src/iqk/iqk_quantize.cpp
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-10-04 16:16:01 +0300
committerGitHub <noreply@github.com>2024-10-04 16:16:01 +0300
commitfe36930c8b7fdf7a6710f7363a9a9f94c2fef9c0 (patch)
tree3d98d1f34823bd843c0340aa8e3932a120a24ad2 /ggml/src/iqk/iqk_quantize.cpp
parentbc79091b0e8602a8d292c22fba0d4072456d52d0 (diff)
Move scale fudge factors to quantization (#81)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/iqk/iqk_quantize.cpp')
-rw-r--r--ggml/src/iqk/iqk_quantize.cpp6
1 files changed, 2 insertions, 4 deletions
diff --git a/ggml/src/iqk/iqk_quantize.cpp b/ggml/src/iqk/iqk_quantize.cpp
index 28bad18e..3ff6b4da 100644
--- a/ggml/src/iqk/iqk_quantize.cpp
+++ b/ggml/src/iqk/iqk_quantize.cpp
@@ -589,7 +589,6 @@ void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const fl
if (!max_abs_scale) continue;
float d = max_abs_scale/15;
- y[ibl].d = GGML_FP32_TO_FP16(d);
y[ibl].extra = extra;
float id = 1/d;
@@ -624,7 +623,7 @@ void quantize_row_iq2_k_impl(const float * x, void * vy, int n_per_row, const fl
}
}
}
- if (sumq2 > 0) y[ibl].d = GGML_FP32_TO_FP16(sumqx/sumq2);
+ y[ibl].d = GGML_FP32_TO_FP16(1.025f*(sumq2 > 0 ? sumqx/sumq2 : d));
}
}
@@ -854,7 +853,6 @@ static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, c
if (!max_abs_scale) continue;
float d = max_abs_scale/31;
- y[ibl].d = GGML_FP32_TO_FP16(d);
y[ibl].extra = extra;
float id = 1/d;
@@ -892,7 +890,7 @@ static void quantize_row_iq3_k_impl(const float * x, void * vy, int n_per_row, c
}
}
}
- if (sumq2 > 0) y[ibl].d = GGML_FP32_TO_FP16(sumqx/sumq2);
+ y[ibl].d = GGML_FP32_TO_FP16(1.01f*(sumq2 > 0 ? sumqx/sumq2 : d));
}
}