diff options
author | slaren <slarengh@gmail.com> | 2024-06-13 03:11:35 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-13 03:11:35 +0200 |
commit | f578b86b2123d0f92afbaa98a031df4d4464e582 (patch) | |
tree | 2a21feec089e5fcaa6f9d34be5468a17c3a5ddc7 /ggml-sycl.cpp | |
parent | 1c641e6aac5c18b964e7b32d9dbbb4bf5301d0d7 (diff) |
move BLAS to a separate backend (#6210)
* move BLAS to a separate backend
* rename GGML_USE_OPENBLAS to GGML_USE_BLAS
* alloc : reuse same buffer when the same buffer type if used multiple times
* set number of threads automatically for openblas and blis
* sched : print assignments when GGML_SCHED_DEBUG env variable is set
* sched : allow ops with weights on an incompatible buffer type
This will cause the weight to be copied to a backend that supports the
op, which is very costly. The weight should have been stored in a buffer
of a backend that can run the op, but llama.cpp cannot do this
automatically at the moment.
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-sycl.cpp')
-rw-r--r-- | ggml-sycl.cpp | 28 |
1 files changed, 10 insertions, 18 deletions
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp index e7d260bd..6f41ed27 100644 --- a/ggml-sycl.cpp +++ b/ggml-sycl.cpp @@ -16575,22 +16575,12 @@ GGML_CALL static size_t ggml_backend_sycl_buffer_type_get_alloc_size(ggml_backen UNUSED(buft); } -GGML_CALL static bool ggml_backend_sycl_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { - if (!ggml_backend_is_sycl(backend)) { - return false; - } - ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context; - ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; - return buft_ctx->device == sycl_ctx->device; -} - static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = { /* .get_name = */ ggml_backend_sycl_buffer_type_name, /* .alloc_buffer = */ ggml_backend_sycl_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_sycl_buffer_type_get_alignment, /* .get_max_size = */ ggml_backend_sycl_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_sycl_buffer_type_get_alloc_size, - /* .supports_backend = */ ggml_backend_sycl_buffer_type_supports_backend, /* .is_host = */ nullptr, }; @@ -16942,12 +16932,6 @@ GGML_CALL static size_t ggml_backend_sycl_split_buffer_type_get_alloc_size(ggml_ return total_size; } -GGML_CALL static bool ggml_backend_sycl_split_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) { - return ggml_backend_is_sycl(backend); - - UNUSED(buft); -} - GGML_CALL static bool ggml_backend_sycl_split_buffer_type_is_host(ggml_backend_buffer_type_t buft) { return false; @@ -16960,7 +16944,6 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface /* .get_alignment = */ ggml_backend_sycl_split_buffer_type_get_alignment, /* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_alloc_size = */ ggml_backend_sycl_split_buffer_type_get_alloc_size, - /* .supports_backend = */ ggml_backend_sycl_split_buffer_type_supports_backend, /* .is_host = */ ggml_backend_sycl_split_buffer_type_is_host, }; @@ -17046,7 +17029,6 @@ ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type() { /* .get_alignment = */ ggml_backend_cpu_buffer_type()->iface.get_alignment, /* .get_max_size = */ NULL, // TODO: return device.maxBufferLength /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, - /* .supports_backend = */ ggml_backend_cpu_buffer_type()->iface.supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, /* .context = */ nullptr, @@ -17311,6 +17293,14 @@ GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const GGML_UNUSED(backend); } +GGML_CALL static bool ggml_backend_sycl_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) { + if (buft->iface.get_name != ggml_backend_sycl_buffer_type_name) { + return false; + } + ggml_backend_sycl_buffer_type_context * buft_ctx = (ggml_backend_sycl_buffer_type_context *)buft->context; + ggml_backend_sycl_context * sycl_ctx = (ggml_backend_sycl_context *)backend->context; + return buft_ctx->device == sycl_ctx->device; +} static ggml_backend_i ggml_backend_sycl_interface = { /* .get_name = */ ggml_backend_sycl_name, @@ -17322,9 +17312,11 @@ static ggml_backend_i ggml_backend_sycl_interface = { /* .synchronize = */ ggml_backend_sycl_synchronize, /* .graph_plan_create = */ NULL, /* .graph_plan_free = */ NULL, + /* .graph_plan_update = */ NULL, /* .graph_plan_compute = */ NULL, /* .graph_compute = */ ggml_backend_sycl_graph_compute, /* .supports_op = */ ggml_backend_sycl_supports_op, + /* .supports_buft = */ ggml_backend_sycl_supports_buft, /* .offload_op = */ ggml_backend_sycl_offload_op, /* .event_new = */ NULL, /* .event_free = */ NULL, |