summaryrefslogtreecommitdiff
path: root/ggml-quants.c
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 11:44:00 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2024-06-22 12:02:53 +0300
commitb747093582c474bfa92798d1dd17dec7b982718a (patch)
treeb55fddcdac6e09427e921af630f1bd4ca7946ed9 /ggml-quants.c
parent8c936e3d6593bec82975ba93bec05f9f03bb21f3 (diff)
bitnet: qnfs tests
Q8_0 fails because as per design the reference quantization is different from the vecdot quantization.
Diffstat (limited to 'ggml-quants.c')
-rw-r--r--ggml-quants.c33
1 files changed, 33 insertions, 0 deletions
diff --git a/ggml-quants.c b/ggml-quants.c
index f1ce1345..6821af0d 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -3,6 +3,9 @@
#include "ggml-quants.h"
#include "ggml-impl.h"
+#if GGML_USE_IQK_MULMAT
+#include "iqk_mul_mat.h"
+#endif
#include <math.h>
@@ -3801,6 +3804,11 @@ static inline __m128i get_scale_shuffle(int i) {
#endif
void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
const int qk = QK8_0;
const int nb = n / qk;
@@ -4392,6 +4400,11 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
}
void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
const int qk = QK8_1;
const int nb = n / qk;
@@ -4683,6 +4696,11 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
}
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
const int qk = QK8_0;
const int nb = n / qk;
@@ -5043,6 +5061,11 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
}
void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
const int qk = QK8_1;
const int nb = n / qk;
@@ -5422,6 +5445,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
}
void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q8_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
const int qk = QK8_0;
const int nb = n / qk;
@@ -11798,6 +11826,11 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void
}
void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+ if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_IQ4_NL, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+ return;
+ }
+#endif
assert(nrc == 1);
UNUSED(nrc);
UNUSED(bx);