From 0d56246f4b9764158525d894b96606f6163c53a8 Mon Sep 17 00:00:00 2001 From: slaren Date: Thu, 18 Apr 2024 15:18:48 +0200 Subject: ggml : group all experts in a single ggml_mul_mat_id (#6505) * ggml : group all experts in a single ggml_mul_mat_id cuda : improve mmid row copy * cuda : fix bin bcast with non-cont src0 * test-backend-ops : only run all mul mat tests for base types * llama : disable moe offloading with SYCL --------- Co-authored-by: Georgi Gerganov --- ggml.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'ggml.h') diff --git a/ggml.h b/ggml.h index e9ed8eee..4d1d77fe 100644 --- a/ggml.h +++ b/ggml.h @@ -1161,13 +1161,11 @@ extern "C" { enum ggml_prec prec); // indirect matrix multiplication - // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b) GGML_API struct ggml_tensor * ggml_mul_mat_id( struct ggml_context * ctx, struct ggml_tensor * as, - struct ggml_tensor * ids, - int id, - struct ggml_tensor * b); + struct ggml_tensor * b, + struct ggml_tensor * ids); // A: m columns, n rows, // B: p columns, n rows, -- cgit v1.2.3