From 7d1a378b8fb266782d9248538a661405aad80768 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Wed, 5 Jun 2024 16:53:00 +0200 Subject: CUDA: refactor mmq, dmmv, mmvq (#7716) * CUDA: refactor mmq, dmmv, mmvq * fix out-of-bounds write * struct for qk, qr, qi * fix cmake build * mmq_type_traits --- ggml-common.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'ggml-common.h') diff --git a/ggml-common.h b/ggml-common.h index 77e6bfba..e8efceb7 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2; #define QI1_S (QK_K / (4*QR1_S)) #define QR1_S 8 +#define QI1_M (QK_K / (4*QR1_M)) +#define QR1_M 8 + #define QI4_NL (QK4_NL / (4*QR4_NL)) #define QR4_NL 2 #define QI4_XS (QK_K / (4*QR4_XS)) #define QR4_XS 8 +#define QI3_S (QK_K / (4*QR3_S)) +#define QR3_S 8 + #endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP #define QK4_0 32 -- cgit v1.2.3