summaryrefslogtreecommitdiff
path: root/ggml-sycl.cpp
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-04-18 15:18:48 +0200
committerGitHub <noreply@github.com>2024-04-18 15:18:48 +0200
commit0d56246f4b9764158525d894b96606f6163c53a8 (patch)
tree43e57dfbbde67b701020fc3e2ac885e846925d26 /ggml-sycl.cpp
parent03c0946d73c63ea73e1d85015b7088298443d438 (diff)
ggml : group all experts in a single ggml_mul_mat_id (#6505)
* ggml : group all experts in a single ggml_mul_mat_id cuda : improve mmid row copy * cuda : fix bin bcast with non-cont src0 * test-backend-ops : only run all mul mat tests for base types * llama : disable moe offloading with SYCL --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-sycl.cpp')
-rw-r--r--ggml-sycl.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index f5bb7da8..a9b31024 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -17752,7 +17752,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
const int min_batch_size = 32;
- return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
+ return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID;
GGML_UNUSED(backend);
}