summaryrefslogtreecommitdiff
path: root/ggml/src
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-10-01 15:52:35 +0300
committerGitHub <noreply@github.com>2024-10-01 15:52:35 +0300
commit0999f77e5b1a97164ee0218f5fc118fe1649b0a3 (patch)
treef27d4b2adab218a4e0b91a26b06ebc249a567b2c /ggml/src
parent970df4b46701074cf907dcbfa0cf2feab972ed0d (diff)
Fix Q5_0 flash attention (#75)
When I changed iqk_mul_mat to use type-1 dot products for type-0 legacy quants, I forgot to also change the vec_dot_type when the dot product is done via ggml as in flash attention. This commit fixes it. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src')
-rw-r--r--ggml/src/ggml-quants.c7
1 files changed, 6 insertions, 1 deletions
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
index b0e70bcc..d6b1dc0a 100644
--- a/ggml/src/ggml-quants.c
+++ b/ggml/src/ggml-quants.c
@@ -4641,7 +4641,12 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
void ggml_vec_dot_q5_0_q8_0(int n, float * restrict s, size_t bs, const void * restrict vx, size_t bx, const void * restrict vy, size_t by, int nrc) {
#if GGML_USE_IQK_MULMAT
- if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, GGML_TYPE_Q8_0, vy, by, s, bs, 0, 1)) {
+#ifdef __AVX2__
+ const enum ggml_type vec_dot_type = GGML_TYPE_Q8_1;
+#else
+ const enum ggml_type vec_dot_type = GGML_TYPE_Q8_0;
+#endif
+ if (iqk_mul_mat(nrc, nrc, n, GGML_TYPE_Q5_0, vx, bx, vec_dot_type, vy, by, s, bs, 0, 1)) {
return;
}
#endif