diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-05-11 10:32:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-11 10:32:41 +0300 |
commit | 9cb317f77e53067f7a138cc89ef7657148eae8e6 (patch) | |
tree | 3ba1d2d80d1d7c8b4ab01f6396a3febaae26e91b /ggml.h | |
parent | e849648888a11de13aaaa4cb2eda3f5a9c7b444d (diff) |
ggml : full ALiBi support (#7192)
* ggml : full ALiBi support
* ggml : update ggml_soft_max_ext() CUDA, SYCL
* ggml : ggml_flash_attn_ext() support ALiBi (CPU)
* ggml : ggml_flash_attn_ext() support ALiBi (Metal)
* ggml : fix warning
* ggml : ggml_flash_attn_ext() support ALiBi (CUDA)
ggml-ci
* ggml : fix assert message
* vulkan : add dev notes
* ggml : require mask when using ALiBi
ggml-ci
* convert : fix convert for refact models
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 18 |
1 files changed, 3 insertions, 15 deletions
@@ -468,7 +468,6 @@ extern "C" { GGML_OP_SOFT_MAX_BACK, GGML_OP_ROPE, GGML_OP_ROPE_BACK, - GGML_OP_ALIBI, GGML_OP_CLAMP, GGML_OP_CONV_TRANSPOSE_1D, GGML_OP_IM2COL, @@ -1428,15 +1427,13 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); - // fused soft_max(a*scale + mask + pos[i]*(ALiBi slope)) + // fused soft_max(a*scale + mask*(ALiBi slope)) // mask is optional - // pos is required when max_bias > 0.0f // max_bias = 0.0f for no ALiBi GGML_API struct ggml_tensor * ggml_soft_max_ext( struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * mask, - struct ggml_tensor * pos, float scale, float max_bias); @@ -1538,16 +1535,6 @@ extern "C" { float xpos_base, bool xpos_down); - // alibi position embedding - // in-place, returns view(a) - GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_alibi( - struct ggml_context * ctx, - struct ggml_tensor * a, - int n_past, - int n_head, - float bias_max), - "use ggml_soft_max_ext instead (will be removed in Mar 2024)"); - // clamp // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_clamp( @@ -1744,7 +1731,8 @@ extern "C" { struct ggml_tensor * k, struct ggml_tensor * v, struct ggml_tensor * mask, - float scale); + float scale, + float max_bias); GGML_API void ggml_flash_attn_ext_set_prec( struct ggml_tensor * a, |