diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-05-22 10:24:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-22 10:24:29 +0200 |
commit | 95fb0aefab568348da159efdd370e064d1b35f97 (patch) | |
tree | f035fd7b36da2182f5c2fde82c9b439f3cbe1e12 /ggml-cuda | |
parent | 3e5faa85032ec3106a2ad831bf412be9ff139f47 (diff) |
CUDA: remove incorrect precision check (#7454)
Diffstat (limited to 'ggml-cuda')
-rw-r--r-- | ggml-cuda/fattn-tile-f32.cu | 3 |
1 files changed, 0 insertions, 3 deletions
diff --git a/ggml-cuda/fattn-tile-f32.cu b/ggml-cuda/fattn-tile-f32.cu index 130e7cbd..54db765e 100644 --- a/ggml-cuda/fattn-tile-f32.cu +++ b/ggml-cuda/fattn-tile-f32.cu @@ -286,9 +286,6 @@ void ggml_cuda_flash_attn_ext_tile_f32(ggml_backend_cuda_context & ctx, ggml_ten const ggml_tensor * KQV = dst; const ggml_tensor * Q = dst->src[0]; - const int32_t precision = KQV->op_params[2]; - GGML_ASSERT(precision == GGML_PREC_DEFAULT); - if (Q->ne[1] <= 16) { constexpr int cols_per_block = 16; constexpr int parallel_blocks = 4; |