diff options
author | slaren <slarengh@gmail.com> | 2024-04-18 15:18:48 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-04-18 15:18:48 +0200 |
commit | 0d56246f4b9764158525d894b96606f6163c53a8 (patch) | |
tree | 43e57dfbbde67b701020fc3e2ac885e846925d26 /ggml-sycl.cpp | |
parent | 03c0946d73c63ea73e1d85015b7088298443d438 (diff) |
ggml : group all experts in a single ggml_mul_mat_id (#6505)
* ggml : group all experts in a single ggml_mul_mat_id
cuda : improve mmid row copy
* cuda : fix bin bcast with non-cont src0
* test-backend-ops : only run all mul mat tests for base types
* llama : disable moe offloading with SYCL
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-sycl.cpp')
-rw-r--r-- | ggml-sycl.cpp | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index f5bb7da8..a9b31024 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -17752,7 +17752,7 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) { const int min_batch_size = 32; - return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS; + return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS && op->op != GGML_OP_MUL_MAT_ID; GGML_UNUSED(backend); } |