summaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-04-18 15:18:48 +0200
committerGitHub <noreply@github.com>2024-04-18 15:18:48 +0200
commit0d56246f4b9764158525d894b96606f6163c53a8 (patch)
tree43e57dfbbde67b701020fc3e2ac885e846925d26 /ggml.h
parent03c0946d73c63ea73e1d85015b7088298443d438 (diff)
ggml : group all experts in a single ggml_mul_mat_id (#6505)
* ggml : group all experts in a single ggml_mul_mat_id cuda : improve mmid row copy * cuda : fix bin bcast with non-cont src0 * test-backend-ops : only run all mul mat tests for base types * llama : disable moe offloading with SYCL --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h6
1 files changed, 2 insertions, 4 deletions
diff --git a/ggml.h b/ggml.h
index e9ed8eee..4d1d77fe 100644
--- a/ggml.h
+++ b/ggml.h
@@ -1161,13 +1161,11 @@ extern "C" {
enum ggml_prec prec);
// indirect matrix multiplication
- // ggml_mul_mat_id(ctx, as, ids, id, b) ~= ggml_mul_mat(as[ids[id]], b)
GGML_API struct ggml_tensor * ggml_mul_mat_id(
struct ggml_context * ctx,
struct ggml_tensor * as,
- struct ggml_tensor * ids,
- int id,
- struct ggml_tensor * b);
+ struct ggml_tensor * b,
+ struct ggml_tensor * ids);
// A: m columns, n rows,
// B: p columns, n rows,