diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-06-05 16:53:00 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-05 16:53:00 +0200 |
commit | 7d1a378b8fb266782d9248538a661405aad80768 (patch) | |
tree | 7ce459a4c5a85e75f75825772124aedc3bb54b7f /ggml-common.h | |
parent | 2b3389677a833cee0880226533a1768b1a9508d2 (diff) |
CUDA: refactor mmq, dmmv, mmvq (#7716)
* CUDA: refactor mmq, dmmv, mmvq
* fix out-of-bounds write
* struct for qk, qr, qi
* fix cmake build
* mmq_type_traits
Diffstat (limited to 'ggml-common.h')
-rw-r--r-- | ggml-common.h | 6 |
1 files changed, 6 insertions, 0 deletions
diff --git a/ggml-common.h b/ggml-common.h index 77e6bfba..e8efceb7 100644 --- a/ggml-common.h +++ b/ggml-common.h @@ -123,12 +123,18 @@ typedef sycl::half2 ggml_half2; #define QI1_S (QK_K / (4*QR1_S)) #define QR1_S 8 +#define QI1_M (QK_K / (4*QR1_M)) +#define QR1_M 8 + #define QI4_NL (QK4_NL / (4*QR4_NL)) #define QR4_NL 2 #define QI4_XS (QK_K / (4*QR4_XS)) #define QR4_XS 8 +#define QI3_S (QK_K / (4*QR3_S)) +#define QR3_S 8 + #endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP #define QK4_0 32 |