Vulkan: adding GGML_OP_MULTI_ADD implementation (#582)

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <iwankawrakow@gmail.com> 2025-07-04 08:33:43 +0200
committer: GitHub <noreply@github.com> 2025-07-04 08:33:43 +0200
commit: 235c989e398ef38c8c29c76017807705ddfa3a88 (patch)
tree: ab37ebb8a3ae1f862da8a59e4471eb38ce289a9d /src/llama.cpp
parent: 3e024de1dae45d17110a7dfe02cadea2eb111f51 (diff)
1 files changed, 23 insertions, 23 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index a1821d2d..8c16e778 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9870,28 +9870,28 @@ llm_expert_gating_func_type   gating_op,
         cb(cur, "ffn_moe_weighted", il);
     }
 
-#ifdef GGML_USE_VULKAN
-    // aggregate experts
-    ggml_tensor * moe_out = nullptr;
-    //ggml_tensor * first_expert = nullptr;
-    for (int i = 0; i < n_expert_used; ++i) {
-        ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
-                experts->nb[2], i*experts->nb[1]);
-
-        if (i == 0) {
-            moe_out = cur_expert;
-        } else {
-            moe_out = ggml_add(ctx, moe_out, cur_expert);
-        }
-    }
-
-    if (n_expert_used == 1) {
-        // avoid returning a non-contiguous tensor
-        moe_out = ggml_cont(ctx, moe_out);
-    }
-
-    return moe_out;
-#else
+//#ifdef GGML_USE_VULKAN
+//    // aggregate experts
+//    ggml_tensor * moe_out = nullptr;
+//    //ggml_tensor * first_expert = nullptr;
+//    for (int i = 0; i < n_expert_used; ++i) {
+//        ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
+//                experts->nb[2], i*experts->nb[1]);
+//
+//        if (i == 0) {
+//            moe_out = cur_expert;
+//        } else {
+//            moe_out = ggml_add(ctx, moe_out, cur_expert);
+//        }
+//    }
+//
+//    if (n_expert_used == 1) {
+//        // avoid returning a non-contiguous tensor
+//        moe_out = ggml_cont(ctx, moe_out);
+//    }
+//
+//    return moe_out;
+//#else
     if (n_expert_used == 1) {
         return ggml_cont(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0));
     }
@@ -9900,7 +9900,7 @@ llm_expert_gating_func_type   gating_op,
                              ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], experts->nb[1]));
     }
     return ggml_multi_add(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0), n_expert_used);
-#endif
+//#endif
 
 }
author	Kawrakow <iwankawrakow@gmail.com>	2025-07-04 08:33:43 +0200
committer	GitHub <noreply@github.com>	2025-07-04 08:33:43 +0200
commit	235c989e398ef38c8c29c76017807705ddfa3a88 (patch)
tree	ab37ebb8a3ae1f862da8a59e4471eb38ce289a9d /src/llama.cpp
parent	3e024de1dae45d17110a7dfe02cadea2eb111f51 (diff)