From a743d76a01f23038b2c85af1e9048ee836767b44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Thu, 9 May 2024 14:32:02 +0200 Subject: CUDA: generalize FP16 fattn vec kernel (#7061) * CUDA: generalize FP16 fattn vec kernel * disable unsupported head sizes for AMD in test * try AMD fix * fix batch size 2-8 * partially revert changes --- llama.cpp | 7 ------- 1 file changed, 7 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 806c2093..7572f8d5 100644 --- a/llama.cpp +++ b/llama.cpp @@ -15519,13 +15519,6 @@ struct llama_context * llama_new_context_with_model( cparams.flash_attn = false; } -#ifdef GGML_USE_HIPBLAS - if (cparams.flash_attn) { - LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__); - cparams.flash_attn = false; - } -#endif - if (params.seed == LLAMA_DEFAULT_SEED) { params.seed = time(NULL); } -- cgit v1.2.3