summaryrefslogtreecommitdiff
path: root/ggml-kompute.cpp
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2024-06-13 03:11:35 +0200
committerGitHub <noreply@github.com>2024-06-13 03:11:35 +0200
commitf578b86b2123d0f92afbaa98a031df4d4464e582 (patch)
tree2a21feec089e5fcaa6f9d34be5468a17c3a5ddc7 /ggml-kompute.cpp
parent1c641e6aac5c18b964e7b32d9dbbb4bf5301d0d7 (diff)
move BLAS to a separate backend (#6210)
* move BLAS to a separate backend * rename GGML_USE_OPENBLAS to GGML_USE_BLAS * alloc : reuse same buffer when the same buffer type if used multiple times * set number of threads automatically for openblas and blis * sched : print assignments when GGML_SCHED_DEBUG env variable is set * sched : allow ops with weights on an incompatible buffer type This will cause the weight to be copied to a backend that supports the op, which is very costly. The weight should have been stored in a buffer of a backend that can run the op, but llama.cpp cannot do this automatically at the moment. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-kompute.cpp')
-rw-r--r--ggml-kompute.cpp13
1 files changed, 7 insertions, 6 deletions
diff --git a/ggml-kompute.cpp b/ggml-kompute.cpp
index 18c6f4a1..ed5f2e34 100644
--- a/ggml-kompute.cpp
+++ b/ggml-kompute.cpp
@@ -1902,18 +1902,12 @@ static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_
return ctx->max_alloc;
}
-static bool ggml_backend_kompute_buffer_type_supports_backend(ggml_backend_buffer_type_t buft, ggml_backend_t backend) {
- GGML_UNUSED(buft);
- return ggml_backend_is_kompute(backend);
-}
-
static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = {
/* .get_name = */ ggml_backend_kompute_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_kompute_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_kompute_buffer_type_get_alignment,
/* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size,
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
- /* .supports_backend = */ ggml_backend_kompute_buffer_type_supports_backend,
/* .is_host = */ NULL,
};
@@ -1973,6 +1967,11 @@ static bool ggml_backend_kompute_supports_op(ggml_backend_t backend, const struc
return ggml_vk_supports_op(op);
}
+static bool ggml_backend_kompute_supports_buft(ggml_backend_t backend, ggml_backend_buffer_type_t buft) {
+ GGML_UNUSED(backend);
+ return buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name;
+}
+
static struct ggml_backend_i kompute_backend_i = {
/* .get_name = */ ggml_backend_kompute_name,
/* .free = */ ggml_backend_kompute_free,
@@ -1983,9 +1982,11 @@ static struct ggml_backend_i kompute_backend_i = {
/* .synchronize = */ NULL,
/* .graph_plan_create = */ NULL,
/* .graph_plan_free = */ NULL,
+ /* .graph_plan_update = */ NULL,
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
/* .supports_op = */ ggml_backend_kompute_supports_op,
+ /* .supports_buft = */ ggml_backend_kompute_supports_buft,
/* .offload_op = */ NULL,
/* .event_new = */ NULL,
/* .event_free = */ NULL,