summaryrefslogtreecommitdiff
path: root/ggml-common.h
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2024-06-05 16:53:00 +0200
committerGitHub <noreply@github.com>2024-06-05 16:53:00 +0200
commit7d1a378b8fb266782d9248538a661405aad80768 (patch)
tree7ce459a4c5a85e75f75825772124aedc3bb54b7f /ggml-common.h
parent2b3389677a833cee0880226533a1768b1a9508d2 (diff)
CUDA: refactor mmq, dmmv, mmvq (#7716)
* CUDA: refactor mmq, dmmv, mmvq * fix out-of-bounds write * struct for qk, qr, qi * fix cmake build * mmq_type_traits
Diffstat (limited to 'ggml-common.h')
-rw-r--r--ggml-common.h6
1 files changed, 6 insertions, 0 deletions
diff --git a/ggml-common.h b/ggml-common.h
index 77e6bfba..e8efceb7 100644
--- a/ggml-common.h
+++ b/ggml-common.h
@@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2;
#define QI1_S (QK_K / (4*QR1_S))
#define QR1_S 8
+#define QI1_M (QK_K / (4*QR1_M))
+#define QR1_M 8
+
#define QI4_NL (QK4_NL / (4*QR4_NL))
#define QR4_NL 2
#define QI4_XS (QK_K / (4*QR4_XS))
#define QR4_XS 8
+#define QI3_S (QK_K / (4*QR3_S))
+#define QR3_S 8
+
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
#define QK4_0 32