summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-10-02 17:05:56 +0300
committerGitHub <noreply@github.com>2024-10-02 17:05:56 +0300
commit50b5e90112766dc4de276ccb0d0abf0f9a974b84 (patch)
treeb463969276dad675ece86f4ecca1ebb001ba5019 /src/llama.cpp
parentcce49832c1b81b4e535e78ff308417ef3a386b18 (diff)
Fused unary(x)*y (#70)
* Adding fused y*unary(x) op * Fused y*unary(x) op: CUDA * Fused y*unary(x) op: dedicated CPU implementation for silu and gelu * Fused y*unary(x) op: Metal --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index eb982125..9ed109c6 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -8083,6 +8083,13 @@ static struct ggml_tensor * llm_build_ffn(
cur = tmp;
}
+ if (type_gate == LLM_FFN_PAR &&
+ (type_op == LLM_FFN_SILU || type_op == LLM_FFN_RELU || (type_op == LLM_FFN_GELU && !act_scales))) {
+ cur = ggml_fused_mul_unary(ctx, cur, tmp, type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU :
+ type_op == LLM_FFN_RELU ? GGML_UNARY_OP_RELU : GGML_UNARY_OP_GELU);
+ }
+ else {
+
switch (type_op) {
case LLM_FFN_SILU:
{
@@ -8122,6 +8129,7 @@ static struct ggml_tensor * llm_build_ffn(
cur = ggml_mul(ctx, cur, tmp);
cb(cur, "ffn_gate_par", il);
}
+ }
if (down) {
cur = llm_build_lora_mm(lctx, ctx, down, cur);