From a743d76a01f23038b2c85af1e9048ee836767b44 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= <johannesg@5d6.de>
Date: Thu, 9 May 2024 14:32:02 +0200
Subject: CUDA: generalize FP16 fattn vec kernel (#7061)

* CUDA: generalize FP16 fattn vec kernel

* disable unsupported head sizes for AMD in test

* try AMD fix

* fix batch size 2-8

* partially revert changes
---
 llama.cpp | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'llama.cpp')

diff --git a/llama.cpp b/llama.cpp
index 806c2093..7572f8d5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -15519,13 +15519,6 @@ struct llama_context * llama_new_context_with_model(
         cparams.flash_attn = false;
     }
 
-#ifdef GGML_USE_HIPBLAS
-    if (cparams.flash_attn) {
-        LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__);
-        cparams.flash_attn = false;
-    }
-#endif
-
     if (params.seed == LLAMA_DEFAULT_SEED) {
         params.seed = time(NULL);
     }
-- 
cgit v1.2.3