bitnet: qnfs tests

Q8_0 fails because as per design the reference quantization is different from the vecdot quantization.
author: Iwan Kawrakow <iwan.kawrakow@gmail.com> 2024-06-22 11:44:00 +0300
committer: Iwan Kawrakow <iwan.kawrakow@gmail.com> 2024-06-22 12:02:53 +0300
commit: b747093582c474bfa92798d1dd17dec7b982718a (patch)
tree: b55fddcdac6e09427e921af630f1bd4ca7946ed9 /ggml-quants.c
parent: 8c936e3d6593bec82975ba93bec05f9f03bb21f3 (diff)
1 files changed, 33 insertions, 0 deletions
diff --git a/ggml-quants.c b/ggml-quants.c
index f1ce1345..6821af0d 100644
--- a/ggml-quants.c
+++ b/ggml-quants.c
@@ -3,6 +3,9 @@
 
 #include "ggml-quants.h"
 #include "ggml-impl.h"
+#if GGML_USE_IQK_MULMAT
+#include "iqk_mul_mat.h"
+#endif
 
 
 #include <math.h>
@@ -3801,6 +3804,11 @@ static inline __m128i get_scale_shuffle(int i) {
 #endif
 
 void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -4392,6 +4400,11 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     const int qk = QK8_1;
     const int nb = n / qk;
 
@@ -4683,6 +4696,11 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -5043,6 +5061,11 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     const int qk = QK8_1;
     const int nb = n / qk;
 
@@ -5422,6 +5445,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
 }
 
 void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q8_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     const int qk = QK8_0;
     const int nb = n / qk;
 
@@ -11798,6 +11826,11 @@ void ggml_vec_dot_iq1_m_q8_K  (int n, float * restrict s, size_t bs, const void
 }
 
 void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
+#if GGML_USE_IQK_MULMAT
+    if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_IQ4_NL, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+        return;
+    }
+#endif
     assert(nrc == 1);
     UNUSED(nrc);
     UNUSED(bx);
author	Iwan Kawrakow <iwan.kawrakow@gmail.com>	2024-06-22 11:44:00 +0300
committer	Iwan Kawrakow <iwan.kawrakow@gmail.com>	2024-06-22 12:02:53 +0300
commit	b747093582c474bfa92798d1dd17dec7b982718a (patch)
tree	b55fddcdac6e09427e921af630f1bd4ca7946ed9 /ggml-quants.c
parent	8c936e3d6593bec82975ba93bec05f9f03bb21f3 (diff)