From 504fb890d90ec27e5f4822b7bd772fa94d4d6aac Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Sun, 11 May 2025 12:22:19 +0300 Subject: Revert "Fix race in the CUDA DeepSeek FA kernel (#406)" This reverts commit 36e6e888b75ae93fb5aac212bb0e147d8379ae23. I should have tested. We get NaNs. --- ggml/src/ggml-cuda/fattn-new-mma.cu | 2 -- 1 file changed, 2 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-new-mma.cu b/ggml/src/ggml-cuda/fattn-new-mma.cu index 8da96370..d1484451 100644 --- a/ggml/src/ggml-cuda/fattn-new-mma.cu +++ b/ggml/src/ggml-cuda/fattn-new-mma.cu @@ -898,8 +898,6 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile( KQ_crs += __shfl_xor_sync(0xFFFFFFFF, KQ_crs, offset, WARP_SIZE); } - __syncthreads(); - // Write back combined meta data: #pragma unroll for (int imeta = 0; imeta < nmeta; ++imeta) { -- cgit v1.2.3