summaryrefslogtreecommitdiff
path: root/examples/finetune/finetune.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2024-05-23 10:00:44 +0300
committerGitHub <noreply@github.com>2024-05-23 10:00:44 +0300
commitd48c88cbd563b6cf0ce972e2f56796896e240736 (patch)
tree14574b8ddc14145b093f6ab531f1e2968bbd6d85 /examples/finetune/finetune.cpp
parente84b71c2c6da6e69c8f815168ea836f9716a325e (diff)
ggml : remove ggml_flash_attn and ggml_flash_ff (#7463)
ggml-ci
Diffstat (limited to 'examples/finetune/finetune.cpp')
-rw-r--r--examples/finetune/finetune.cpp3
1 files changed, 2 insertions, 1 deletions
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 992426c1..22425730 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -643,7 +643,8 @@ static struct ggml_tensor * llama_build_lora_finetune_graphs(
struct ggml_tensor * t15 = ggml_permute (ctx, t12, 0, 3, 1, 2); set_name(t15, "t15"); assert_shape_4d(t15, N, n_embd_head, n_head_kv, n_batch);
struct ggml_tensor * t16;
if (enable_flash_attn) {
- t16 = ggml_flash_attn(ctx, t13, t14, t15, true); set_name(t16, "t16"); assert_shape_4d(t16, n_embd_head, N, n_head, n_batch);
+ GGML_ASSERT(false && "TODO: ggml_flash_attn_ext() not yet supported");
+ //t16 = ggml_flash_attn(ctx, t13, t14, t15, true); set_name(t16, "t16"); assert_shape_4d(t16, n_embd_head, N, n_head, n_batch);
} else {
struct ggml_tensor * t16_0 = ggml_mul_mat (ctx, t14, t13); set_name(t16_0, "t16_0"); assert_shape_4d(t16_0, N, N, n_head, n_batch);
struct ggml_tensor * t16_1 = ggml_scale_inplace (ctx, t16_0, kv_scale); set_name(t16_1, "t16_1"); assert_shape_4d(t16_1, N, N, n_head, n_batch);