summaryrefslogtreecommitdiff
path: root/src/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/llama.cpp')
-rw-r--r--src/llama.cpp48
1 files changed, 23 insertions, 25 deletions
diff --git a/src/llama.cpp b/src/llama.cpp
index 11a7060c..a1821d2d 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -9870,6 +9870,28 @@ llm_expert_gating_func_type gating_op,
cb(cur, "ffn_moe_weighted", il);
}
+#ifdef GGML_USE_VULKAN
+ // aggregate experts
+ ggml_tensor * moe_out = nullptr;
+ //ggml_tensor * first_expert = nullptr;
+ for (int i = 0; i < n_expert_used; ++i) {
+ ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
+ experts->nb[2], i*experts->nb[1]);
+
+ if (i == 0) {
+ moe_out = cur_expert;
+ } else {
+ moe_out = ggml_add(ctx, moe_out, cur_expert);
+ }
+ }
+
+ if (n_expert_used == 1) {
+ // avoid returning a non-contiguous tensor
+ moe_out = ggml_cont(ctx, moe_out);
+ }
+
+ return moe_out;
+#else
if (n_expert_used == 1) {
return ggml_cont(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0));
}
@@ -9878,32 +9900,8 @@ llm_expert_gating_func_type gating_op,
ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], experts->nb[1]));
}
return ggml_multi_add(ctx, ggml_view_2d(ctx, experts, n_embd, n_tokens, experts->nb[2], 0), n_expert_used);
+#endif
- //// aggregate experts
- //ggml_tensor * moe_out = nullptr;
- ////ggml_tensor * first_expert = nullptr;
- //for (int i = 0; i < n_expert_used; ++i) {
- // ggml_tensor * cur_expert = ggml_view_2d(ctx, experts, n_embd, n_tokens,
- // experts->nb[2], i*experts->nb[1]);
-
- // if (i == 0) {
- // moe_out = cur_expert;
- // //first_expert = cur_expert;
- // //printf("%s: %d: %d x %d x %d x %d | %d x %d x %d x %d\n", __func__, ggml_is_contiguous(first_expert),
- // // (int)cur_expert->ne[0], (int)cur_expert->ne[1], (int)cur_expert->ne[2], (int)cur_expert->ne[3],
- // // (int)cur_expert->nb[0], (int)cur_expert->nb[1], (int)cur_expert->nb[2], (int)cur_expert->nb[3]);
- // } else {
- // moe_out = ggml_add(ctx, moe_out, cur_expert);
- // //printf("%s: %d %d\n", __func__, ggml_is_contiguous(cur_expert), ggml_are_same_shape(cur_expert, first_expert));
- // }
- //}
-
- //if (n_expert_used == 1) {
- // // avoid returning a non-contiguous tensor
- // moe_out = ggml_cont(ctx, moe_out);
- //}
-
- //return moe_out;
}
static struct ggml_tensor * llm_build_kqv(