From 0d56246f4b9764158525d894b96606f6163c53a8 Mon Sep 17 00:00:00 2001
From: slaren <slarengh@gmail.com>
Date: Thu, 18 Apr 2024 15:18:48 +0200
Subject: ggml : group all experts in a single ggml_mul_mat_id (#6505)

* ggml : group all experts in a single ggml_mul_mat_id
cuda : improve mmid row copy

* cuda : fix bin bcast with non-cont src0

* test-backend-ops : only run all mul mat tests for base types

* llama : disable moe offloading with SYCL

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
---
 ggml.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'ggml.h')

diff --git a/ggml.h b/ggml.h
index e9ed8eee..4d1d77fe 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1161,13 +1161,11 @@ extern "C" {
             enum ggml_prec       prec);
 
     // indirect matrix multiplication
-    //  ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
     GGML_API struct ggml_tensor * ggml_mul_mat_id(
             struct ggml_context * ctx,
             struct ggml_tensor  * as,
-            struct ggml_tensor  * ids,
-            int                   id,
-            struct ggml_tensor  * b);
+            struct ggml_tensor  * b,
+            struct ggml_tensor  * ids);
 
     // A: m columns, n rows,
     // B: p columns, n rows,
-- 
cgit v1.2.3