summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2024-05-09 14:32:02 +0200
committerGitHub <noreply@github.com>2024-05-09 14:32:02 +0200
commita743d76a01f23038b2c85af1e9048ee836767b44 (patch)
tree8182fc85cb9fd055bc9c8268d5d4a05bcf87f57a /llama.cpp
parentf31ec120bc36c6270e4948e6a065a7c4cfa0c404 (diff)
CUDA: generalize FP16 fattn vec kernel (#7061)
* CUDA: generalize FP16 fattn vec kernel * disable unsupported head sizes for AMD in test * try AMD fix * fix batch size 2-8 * partially revert changes
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp7
1 files changed, 0 insertions, 7 deletions
diff --git a/llama.cpp b/llama.cpp
index 806c2093..7572f8d5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -15519,13 +15519,6 @@ struct llama_context * llama_new_context_with_model(
cparams.flash_attn = false;
}
-#ifdef GGML_USE_HIPBLAS
- if (cparams.flash_attn) {
- LLAMA_LOG_WARN("%s: flash_attn is not yet compatible with HIPBLAS builds - forcing off\n", __func__);
- cparams.flash_attn = false;
- }
-#endif
-
if (params.seed == LLAMA_DEFAULT_SEED) {
params.seed = time(NULL);
}