diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-05-11 10:32:41 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-05-11 10:32:41 +0300 |
commit | 9cb317f77e53067f7a138cc89ef7657148eae8e6 (patch) | |
tree | 3ba1d2d80d1d7c8b4ab01f6396a3febaae26e91b /ggml-kompute.cpp | |
parent | e849648888a11de13aaaa4cb2eda3f5a9c7b444d (diff) |
ggml : full ALiBi support (#7192)
* ggml : full ALiBi support
* ggml : update ggml_soft_max_ext() CUDA, SYCL
* ggml : ggml_flash_attn_ext() support ALiBi (CPU)
* ggml : ggml_flash_attn_ext() support ALiBi (Metal)
* ggml : fix warning
* ggml : ggml_flash_attn_ext() support ALiBi (CUDA)
ggml-ci
* ggml : fix assert message
* vulkan : add dev notes
* ggml : require mask when using ALiBi
ggml-ci
* convert : fix convert for refact models
Diffstat (limited to 'ggml-kompute.cpp')
-rw-r--r-- | ggml-kompute.cpp | 12 |
1 files changed, 9 insertions, 3 deletions
diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp index 9a469821..3f033d58 100644 --- a/ggml-kompute.cpp +++ b/ggml-kompute.cpp @@ -1559,12 +1559,18 @@ static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml case GGML_OP_SOFT_MAX: { float scale; - memcpy(&scale, dst->op_params, sizeof(float)); + float max_bias; -#pragma message("TODO: add ggml_vk_soft_max() F16/F32 src1 and src2 support") + memcpy(&scale, (float *)dst->op_params + 0, sizeof(float)); + memcpy(&max_bias, (float *)dst->op_params + 1, sizeof(float)); + +#pragma message("TODO: add ggml_vk_soft_max() F16 src1 support") #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5021") GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32); - GGML_ASSERT(src2 == nullptr); + +#pragma message("TODO: add ALiBi support") +#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/7192") + GGML_ASSERT(max_bias == 0.0f); ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale); } break; |