summaryrefslogtreecommitdiff
path: root/ggml-cuda.cu
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-05-11 10:32:41 +0300
committerGitHub <noreply@github.com>2024-05-11 10:32:41 +0300
commit9cb317f77e53067f7a138cc89ef7657148eae8e6 (patch)
tree3ba1d2d80d1d7c8b4ab01f6396a3febaae26e91b /ggml-cuda.cu
parente849648888a11de13aaaa4cb2eda3f5a9c7b444d (diff)
ggml : full ALiBi support (#7192)
* ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * ggml : fix assert message * vulkan : add dev notes * ggml : require mask when using ALiBi ggml-ci * convert : fix convert for refact models
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r--ggml-cuda.cu5
1 files changed, 0 insertions, 5 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 6f89a7cc..c5c77879 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -4,7 +4,6 @@
#include "ggml-cuda/common.cuh"
#include "ggml-cuda/acc.cuh"
-#include "ggml-cuda/alibi.cuh"
#include "ggml-cuda/arange.cuh"
#include "ggml-cuda/argsort.cuh"
#include "ggml-cuda/binbcast.cuh"
@@ -2277,9 +2276,6 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
case GGML_OP_ROPE:
ggml_cuda_op_rope(ctx, dst);
break;
- case GGML_OP_ALIBI:
- ggml_cuda_op_alibi(ctx, dst);
- break;
case GGML_OP_IM2COL:
ggml_cuda_op_im2col(ctx, dst);
break;
@@ -2829,7 +2825,6 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
case GGML_OP_DIAG_MASK_INF:
case GGML_OP_SOFT_MAX:
case GGML_OP_ROPE:
- case GGML_OP_ALIBI:
case GGML_OP_IM2COL:
case GGML_OP_POOL_2D:
case GGML_OP_SUM_ROWS: