summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2025-05-07 12:06:49 +0300
committerGitHub <noreply@github.com>2025-05-07 12:06:49 +0300
commit8a5c0410e127bd7d5221dc1d7354b1edff2385c0 (patch)
treefe7f5c93453e1e75fb2dd024a695ee13bee383ea
parent090eae4d693e7d09bae2d86b612c941dbf5c9a96 (diff)
Fix DeepSeek q8_0 cache (#391)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--ggml/src/iqk/iqk_flash_attn.cpp2
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp2
2 files changed, 2 insertions, 2 deletions
diff --git a/ggml/src/iqk/iqk_flash_attn.cpp b/ggml/src/iqk/iqk_flash_attn.cpp
index fd0d5dd0..610f18b7 100644
--- a/ggml/src/iqk/iqk_flash_attn.cpp
+++ b/ggml/src/iqk/iqk_flash_attn.cpp
@@ -81,7 +81,7 @@ extern "C" IQK_API bool iqk_flash_attn_noalibi(int type_q, int type_mask, float
int int_type_k = int_type_k_in;
auto work_buffer = work_buffer_in;
- if (neq1 >= 8 || rk2 >= 8) {
+ if (neq1 >= 8 || (rk2 >= 8 && nek2 > 1)) {
uint64_t row_size = 0;
work_buffer = iqk_repack_k(int_type_k, Dk, nek1, nek2, nek3, stride_k, nbk2, nbk3, k, work_buffer_in, ith, nth, int_type_k, row_size);
if (int_type_k != int_type_k_in) {
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 136174f0..54792c12 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -18033,7 +18033,7 @@ bool iqk_flash_attn_impl(int int_type_k, // type of k
auto type_v = ggml_type(int_type_v);
if (Dk == 576 && Dv == 512) {
- GGML_ASSERT(type_k == type_v);
+ GGML_ASSERT(type_k == type_v || (type_k == GGML_TYPE_Q8_0_R8 && type_v == GGML_TYPE_Q8_0));
stride_q /= sizeof(float); // q stride as float
return iqk_deepseek_helper<32>(type_k, nq1, nk1, stride_q, stride_k, stride_v, stride_m, stride_qkv,
q, (const char *)k, (const char *)v, (const char *)mask, scale, softcap, qkv, M, S);