summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/llama.cpp46
1 files changed, 23 insertions, 23 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index a1821d2d..8c16e778 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9870,28 +9870,28 @@ llm_expert_gating_func_type gating_op,
cb(cur, "ffn_moe_weighted", il);
}
-#ifdef GGML_USE_VULKAN
- // aggregate experts
- ggml_tensor * moe_out = nullptr;
- //ggml_tensor * first_expert = nullptr;
- for (int i = 0; i < n_expert_used; ++i) {
- ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
- experts->nb[2], i*experts->nb[1]);
-
- if (i == 0) {
- moe_out = cur_expert;
- } else {
- moe_out = ggml_add(ctx, moe_out, cur_expert);
- }
- }
-
- if (n_expert_used == 1) {
- // avoid returning a non-contiguous tensor
- moe_out = ggml_cont(ctx, moe_out);
- }
-
- return moe_out;
-#else
+//#ifdef GGML_USE_VULKAN
+// // aggregate experts
+// ggml_tensor * moe_out = nullptr;
+// //ggml_tensor * first_expert = nullptr;
+// for (int i = 0; i < n_expert_used; ++i) {
+// ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
+// experts->nb[2], i*experts->nb[1]);
+//
+// if (i == 0) {
+// moe_out = cur_expert;
+// } else {
+// moe_out = ggml_add(ctx, moe_out, cur_expert);
+// }
+// }
+//
+// if (n_expert_used == 1) {
+// // avoid returning a non-contiguous tensor
+// moe_out = ggml_cont(ctx, moe_out);
+// }
+//
+// return moe_out;
+//#else
if (n_expert_used == 1) {
return ggml_cont(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0));
}
@@ -9900,7 +9900,7 @@ llm_expert_gating_func_type gating_op,
ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], experts->nb[1]));
}
return ggml_multi_add(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0), n_expert_used);
-#endif
+//#endif
}