summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIwan Kawrakow <iwan.kawrakow@gmail.com>2025-05-11 12:22:19 +0300
committerIwan Kawrakow <iwan.kawrakow@gmail.com>2025-05-11 12:22:19 +0300
commit504fb890d90ec27e5f4822b7bd772fa94d4d6aac (patch)
treefa9aa31fe57f0e3abbc0283a66b082535966d7ad
parent36e6e888b75ae93fb5aac212bb0e147d8379ae23 (diff)
Revert "Fix race in the CUDA DeepSeek FA kernel (#406)"
This reverts commit 36e6e888b75ae93fb5aac212bb0e147d8379ae23. I should have tested. We get NaNs.
-rw-r--r--ggml/src/ggml-cuda/fattn-new-mma.cu2
1 files changed, 0 insertions, 2 deletions
diff --git a/ggml/src/ggml-cuda/fattn-new-mma.cu b/ggml/src/ggml-cuda/fattn-new-mma.cu
index 8da96370..d1484451 100644
--- a/ggml/src/ggml-cuda/fattn-new-mma.cu
+++ b/ggml/src/ggml-cuda/fattn-new-mma.cu
@@ -898,8 +898,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE);
}
- __syncthreads();
-
// Write back combined meta data:
#pragma unroll
for (int imeta = 0; imeta < nmeta; ++imeta) {