summaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-05-11 10:32:41 +0300
committerGitHub <noreply@github.com>2024-05-11 10:32:41 +0300
commit9cb317f77e53067f7a138cc89ef7657148eae8e6 (patch)
tree3ba1d2d80d1d7c8b4ab01f6396a3febaae26e91b /ggml.h
parente849648888a11de13aaaa4cb2eda3f5a9c7b444d (diff)
ggml : full ALiBi support (#7192)
* ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * ggml : fix assert message * vulkan : add dev notes * ggml : require mask when using ALiBi ggml-ci * convert : fix convert for refact models
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h18
1 files changed, 3 insertions, 15 deletions
diff --git a/ggml.h b/ggml.h
index fe605382..76c33283 100644
--- a/ggml.h
+++ b/ggml.h
@@ -468,7 +468,6 @@ extern "C" {
GGML_OP_SOFT_MAX_BACK,
GGML_OP_ROPE,
GGML_OP_ROPE_BACK,
- GGML_OP_ALIBI,
GGML_OP_CLAMP,
GGML_OP_CONV_TRANSPOSE_1D,
GGML_OP_IM2COL,
@@ -1428,15 +1427,13 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
- // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope))
+ // fused soft_max(a*scale + mask*(ALiBi slope))
// mask is optional
- // pos is required when max_bias > 0.0f
// max_bias = 0.0f for no ALiBi
GGML_API struct ggml_tensor * ggml_soft_max_ext(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * mask,
- struct ggml_tensor * pos,
float scale,
float max_bias);
@@ -1538,16 +1535,6 @@ extern "C" {
float xpos_base,
bool xpos_down);
- // alibi position embedding
- // in-place, returns view(a)
- GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- int n_past,
- int n_head,
- float bias_max),
- "use ggml_soft_max_ext instead (will be removed in Mar 2024)");
-
// clamp
// in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_clamp(
@@ -1744,7 +1731,8 @@ extern "C" {
struct ggml_tensor * k,
struct ggml_tensor * v,
struct ggml_tensor * mask,
- float scale);
+ float scale,
+ float max_bias);
GGML_API void ggml_flash_attn_ext_set_prec(
struct ggml_tensor * a,