From 50b5e90112766dc4de276ccb0d0abf0f9a974b84 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Wed, 2 Oct 2024 17:05:56 +0300 Subject: Fused unary(x)*y (#70) * Adding fused y*unary(x) op * Fused y*unary(x) op: CUDA * Fused y*unary(x) op: dedicated CPU implementation for silu and gelu * Fused y*unary(x) op: Metal --------- Co-authored-by: Iwan Kawrakow --- src/llama.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/llama.cpp') diff --git a/src/llama.cpp b/src/llama.cpp index eb982125..9ed109c6 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -8083,6 +8083,13 @@ static struct ggml_tensor * llm_build_ffn( cur = tmp; } + if (type_gate == LLM_FFN_PAR && + (type_op == LLM_FFN_SILU || type_op == LLM_FFN_RELU || (type_op == LLM_FFN_GELU && !act_scales))) { + cur = ggml_fused_mul_unary(ctx, cur, tmp, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU : + type_op == LLM_FFN_RELU ? GGML_UNARY_OP_RELU : GGML_UNARY_OP_GELU); + } + else { + switch (type_op) { case LLM_FFN_SILU: { @@ -8122,6 +8129,7 @@ static struct ggml_tensor * llm_build_ffn( cur = ggml_mul(ctx, cur, tmp); cb(cur, "ffn_gate_par", il); } + } if (down) { cur = llm_build_lora_mm(lctx, ctx, down, cur); -- cgit v1.2.3