CUDA: generalize FP16 fattn vec kernel (#7061)

* CUDA: generalize FP16 fattn vec kernel * disable unsupported head sizes for AMD in test * try AMD fix * fix batch size 2-8 * partially revert changes
author: Johannes Gäßler <johannesg@5d6.de> 2024-05-09 14:32:02 +0200
committer: GitHub <noreply@github.com> 2024-05-09 14:32:02 +0200
commit: a743d76a01f23038b2c85af1e9048ee836767b44 (patch)
tree: 8182fc85cb9fd055bc9c8268d5d4a05bcf87f57a /llama.cpp
parent: f31ec120bc36c6270e4948e6a065a7c4cfa0c404 (diff)
1 files changed, 0 insertions, 7 deletions
diff --git a/llama.cpp b/llama.cpp
index 806c2093..7572f8d5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -15519,13 +15519,6 @@ struct llama_context * llama_new_context_with_model(
         cparams.flash_attn = false;
     }
 
-#ifdef GGML_USE_HIPBLAS
-    if (cparams.flash_attn) {
-        LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__);
-        cparams.flash_attn = false;
-    }
-#endif
-
     if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
author	Johannes Gäßler <johannesg@5d6.de>	2024-05-09 14:32:02 +0200
committer	GitHub <noreply@github.com>	2024-05-09 14:32:02 +0200
commit	a743d76a01f23038b2c85af1e9048ee836767b44 (patch)
tree	8182fc85cb9fd055bc9c8268d5d4a05bcf87f57a /llama.cpp
parent	f31ec120bc36c6270e4948e6a065a7c4cfa0c404 (diff)