diff options
author | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 11:44:00 +0300 |
---|---|---|
committer | Iwan Kawrakow <iwan.kawrakow@gmail.com> | 2024-06-22 12:02:53 +0300 |
commit | b747093582c474bfa92798d1dd17dec7b982718a (patch) | |
tree | b55fddcdac6e09427e921af630f1bd4ca7946ed9 /ggml-quants.c | |
parent | 8c936e3d6593bec82975ba93bec05f9f03bb21f3 (diff) |
bitnet: qnfs tests
Q8_0 fails because as per design the reference quantization
is different from the vecdot quantization.
Diffstat (limited to 'ggml-quants.c')
-rw-r--r-- | ggml-quants.c | 33 |
1 files changed, 33 insertions, 0 deletions
diff --git a/ggml-quants.c b/ggml-quants.c index f1ce1345..6821af0d 100644 --- a/ggml-quants.c +++ b/ggml-quants.c @@ -3,6 +3,9 @@ #include "ggml-quants.h" #include "ggml-impl.h" +#if GGML_USE_IQK_MULMAT +#include "iqk_mul_mat.h" +#endif #include <math.h> @@ -3801,6 +3804,11 @@ static inline __m128i get_scale_shuffle(int i) { #endif void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) { + return; + } +#endif const int qk = QK8_0; const int nb = n / qk; @@ -4392,6 +4400,11 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * restrict s, size_t bs, const void * r } void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q4_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) { + return; + } +#endif const int qk = QK8_1; const int nb = n / qk; @@ -4683,6 +4696,11 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r } void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) { + return; + } +#endif const int qk = QK8_0; const int nb = n / qk; @@ -5043,6 +5061,11 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * r } void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q5_1, vx, bx, GGML_TYPE_Q8_1, vy, by, s, bs, 0, 1)) { + return; + } +#endif const int qk = QK8_1; const int nb = n / qk; @@ -5422,6 +5445,11 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r } void ggml_vec_dot_q8_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_Q8_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) { + return; + } +#endif const int qk = QK8_0; const int nb = n / qk; @@ -11798,6 +11826,11 @@ void ggml_vec_dot_iq1_m_q8_K (int n, float * restrict s, size_t bs, const void } void ggml_vec_dot_iq4_nl_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) { +#if GGML_USE_IQK_MULMAT + if (iqk_mul_mat(GGML_TASK_TYPE_COMPUTE, nrc, nrc, n, GGML_TYPE_IQ4_NL, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) { + return; + } +#endif assert(nrc == 1); UNUSED(nrc); UNUSED(bx); |