diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-07-04 08:33:43 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-07-04 08:33:43 +0200 |
commit | 235c989e398ef38c8c29c76017807705ddfa3a88 (patch) | |
tree | ab37ebb8a3ae1f862da8a59e4471eb38ce289a9d /src/llama.cpp | |
parent | 3e024de1dae45d17110a7dfe02cadea2eb111f51 (diff) |
Vulkan: adding GGML_OP_MULTI_ADD implementation (#582)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'src/llama.cpp')
-rw-r--r-- | src/llama.cpp | 46 |
1 files changed, 23 insertions, 23 deletions
diff --git a/src/llama.cpp b/src/llama.cpp index a1821d2d..8c16e778 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -9870,28 +9870,28 @@ llm_expert_gating_func_type gating_op, cb(cur, "ffn_moe_weighted", il); } -#ifdef GGML_USE_VULKAN - // aggregate experts - ggml_tensor * moe_out = nullptr; - //ggml_tensor * first_expert = nullptr; - for (int i = 0; i < n_expert_used; ++i) { - ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens, - experts->nb[2], i*experts->nb[1]); - - if (i == 0) { - moe_out = cur_expert; - } else { - moe_out = ggml_add(ctx, moe_out, cur_expert); - } - } - - if (n_expert_used == 1) { - // avoid returning a non-contiguous tensor - moe_out = ggml_cont(ctx, moe_out); - } - - return moe_out; -#else +//#ifdef GGML_USE_VULKAN +// // aggregate experts +// ggml_tensor * moe_out = nullptr; +// //ggml_tensor * first_expert = nullptr; +// for (int i = 0; i < n_expert_used; ++i) { +// ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens, +// experts->nb[2], i*experts->nb[1]); +// +// if (i == 0) { +// moe_out = cur_expert; +// } else { +// moe_out = ggml_add(ctx, moe_out, cur_expert); +// } +// } +// +// if (n_expert_used == 1) { +// // avoid returning a non-contiguous tensor +// moe_out = ggml_cont(ctx, moe_out); +// } +// +// return moe_out; +//#else if (n_expert_used == 1) { return ggml_cont(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0)); } @@ -9900,7 +9900,7 @@ llm_expert_gating_func_type gating_op, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], experts->nb[1])); } return ggml_multi_add(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0), n_expert_used); -#endif +//#endif } |