summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKawrakow <iwankawrakow@gmail.com>2024-10-26 17:40:32 +0200
committerGitHub <noreply@github.com>2024-10-26 17:40:32 +0200
commit2e5f6db5de85de5cac416c93ae9ff02731498798 (patch)
treed791151e7ea9601b149658c99d60e6a5633e41f1
parentbd309cb782ae8a5205dd741ccb97f6103f74888a (diff)
Bitnet: use the fused mul-silu in the FFN network (#110)
I had forgotten that build_bitnet() does not use the standerd llm_build_ffn function, so the fused mul-silu didn't get used for Bitnet when I added it to llm_build_ffn. This gives us another ~1% speedup for TG-128. Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
-rw-r--r--src/llama.cpp7
1 files changed, 1 insertions, 6 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 27ba5d2f..1384123a 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -13399,12 +13399,7 @@ struct llm_build_context {
cb(cur, "ffn_gate", il);
-
- // combine this with the above scale into ggml_scaled_silu
- cur = ggml_silu(ctx0, cur);
- cb(cur, "ffn_silu", il);
-
- cur = ggml_mul(ctx0, cur, tmp);
+ cur = ggml_fused_mul_unary(ctx0, cur, tmp, GGML_UNARY_OP_SILU);
cb(cur, "ffn_gate_par", il);
cur = llm_build_norm(ctx0, cur, hparams,