From 2e5f6db5de85de5cac416c93ae9ff02731498798 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Sat, 26 Oct 2024 17:40:32 +0200 Subject: Bitnet: use the fused mul-silu in the FFN network (#110) I had forgotten that build_bitnet() does not use the standerd llm_build_ffn function, so the fused mul-silu didn't get used for Bitnet when I added it to llm_build_ffn. This gives us another ~1% speedup for TG-128. Co-authored-by: Iwan Kawrakow --- src/llama.cpp | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'src/llama.cpp') diff --git a/src/llama.cpp b/src/llama.cpp index 27ba5d2f..1384123a 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -13399,12 +13399,7 @@ struct llm_build_context { cb(cur, "ffn_gate", il); - - // combine this with the above scale into ggml_scaled_silu - cur = ggml_silu(ctx0, cur); - cb(cur, "ffn_silu", il); - - cur = ggml_mul(ctx0, cur, tmp); + cur = ggml_fused_mul_unary(ctx0, cur, tmp, GGML_UNARY_OP_SILU); cb(cur, "ffn_gate_par", il); cur = llm_build_norm(ctx0, cur, hparams, -- cgit v1.2.3