summaryrefslogtreecommitdiff
path: root/ggml-backend.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-backend.c')
-rw-r--r--ggml-backend.c104
1 files changed, 102 insertions, 2 deletions
diff --git a/ggml-backend.c b/ggml-backend.c
index 897a4cb5..8b6cf7c9 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -27,6 +27,14 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
return buft->iface.get_alignment(buft);
}
+size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) {
+ // get_max_size is optional, defaults to SIZE_MAX
+ if (buft->iface.get_max_size) {
+ return buft->iface.get_max_size(buft);
+ }
+ return SIZE_MAX;
+}
+
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
// get_alloc_size is optional, defaults to ggml_nbytes
if (buft->iface.get_alloc_size) {
@@ -57,8 +65,6 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init(
size_t size) {
ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer));
- GGML_ASSERT(iface.get_base != NULL);
-
(*buffer) = (struct ggml_backend_buffer) {
/* .interface = */ iface,
/* .buft = */ buft,
@@ -108,6 +114,10 @@ size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer) {
return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer));
}
+size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) {
+ return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer));
+}
+
size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) {
return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor);
}
@@ -122,6 +132,11 @@ bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) {
void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
buffer->usage = usage;
+
+ // FIXME: add a generic callback to the buffer interface
+ if (ggml_backend_buffer_is_multi_buffer(buffer)) {
+ ggml_backend_multi_buffer_set_usage(buffer, usage);
+ }
}
ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) {
@@ -171,6 +186,10 @@ size_t ggml_backend_get_alignment(ggml_backend_t backend) {
return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend));
}
+size_t ggml_backend_get_max_size(ggml_backend_t backend) {
+ return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend));
+}
+
void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
@@ -349,6 +368,11 @@ GGML_CALL static void ggml_backend_registry_init(void) {
extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void);
ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL);
#endif
+
+#ifdef GGML_USE_VULKAN
+ extern GGML_CALL int ggml_backend_vk_reg_devices(void);
+ ggml_backend_vk_reg_devices();
+#endif
}
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
@@ -552,6 +576,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) {
/* .get_name = */ ggml_backend_cpu_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
@@ -607,6 +632,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) {
/* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name,
/* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer,
/* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment,
+ /* .get_max_size = */ NULL, // defaults to SIZE_MAX
/* .get_alloc_size = */ NULL, // defaults to ggml_nbytes
/* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend,
/* .is_host = */ ggml_backend_cpu_buffer_type_is_host,
@@ -763,6 +789,80 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v
GGML_UNUSED(user_data);
}
+// multi-buffer buffer
+
+struct ggml_backend_multi_buffer_context {
+ ggml_backend_buffer_t * buffers;
+ size_t n_buffers;
+};
+
+typedef struct ggml_backend_multi_buffer_context * ggml_backend_multi_buffer_context_t;
+
+GGML_CALL static const char * ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer) {
+ ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
+
+ return ctx->buffers[0]->iface.get_name(ctx->buffers[0]);
+}
+
+GGML_CALL static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) {
+ ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
+ ggml_backend_buffer_free(ctx->buffers[i]);
+ }
+
+ free(ctx->buffers);
+ free(ctx);
+}
+
+GGML_CALL static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
+ ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
+ ggml_backend_buffer_clear(ctx->buffers[i], value);
+ }
+}
+
+static struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void) {
+ static struct ggml_backend_buffer_i multi_backend_buffer_i = {
+ /* .get_name = */ ggml_backend_multi_buffer_get_name,
+ /* .free_buffer = */ ggml_backend_multi_buffer_free_buffer,
+ /* .get_base = */ NULL,
+ /* .init_tensor = */ NULL,
+ /* .set_tensor = */ NULL,
+ /* .get_tensor = */ NULL,
+ /* .cpy_tensor = */ NULL,
+ /* .clear = */ ggml_backend_multi_buffer_clear,
+ /* .reset = */ NULL,
+ };
+
+ return multi_backend_buffer_i;
+}
+
+GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) {
+ ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) malloc(sizeof(struct ggml_backend_multi_buffer_context));
+ ctx->n_buffers = n_buffers;
+ ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t));
+
+ size_t total_size = 0;
+ for (size_t i = 0; i < n_buffers; i++) {
+ ctx->buffers[i] = buffers[i];
+ total_size += ggml_backend_buffer_get_size(buffers[i]);
+ }
+
+ return ggml_backend_buffer_init(buffers[0]->buft, ggml_backend_multi_buffer_context_interface(), ctx, total_size);
+}
+
+GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) {
+ return buffer->iface.get_name == ggml_backend_multi_buffer_get_name;
+}
+
+GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) {
+ GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer));
+ ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context;
+ for (size_t i = 0; i < ctx->n_buffers; i++) {
+ ggml_backend_buffer_set_usage(ctx->buffers[i], usage);
+ }
+}
+
// scheduler