diff options
Diffstat (limited to 'ggml-backend.c')
-rw-r--r-- | ggml-backend.c | 104 |
1 files changed, 102 insertions, 2 deletions
diff --git a/ggml-backend.c b/ggml-backend.c index 897a4cb5..8b6cf7c9 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -27,6 +27,14 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) { return buft->iface.get_alignment(buft); } +size_t ggml_backend_buft_get_max_size(ggml_backend_buffer_type_t buft) { + // get_max_size is optional, defaults to SIZE_MAX + if (buft->iface.get_max_size) { + return buft->iface.get_max_size(buft); + } + return SIZE_MAX; +} + GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) { // get_alloc_size is optional, defaults to ggml_nbytes if (buft->iface.get_alloc_size) { @@ -57,8 +65,6 @@ GGML_CALL ggml_backend_buffer_t ggml_backend_buffer_init( size_t size) { ggml_backend_buffer_t buffer = malloc(sizeof(struct ggml_backend_buffer)); - GGML_ASSERT(iface.get_base != NULL); - (*buffer) = (struct ggml_backend_buffer) { /* .interface = */ iface, /* .buft = */ buft, @@ -108,6 +114,10 @@ size_t ggml_backend_buffer_get_alignment (ggml_backend_buffer_t buffer) { return ggml_backend_buft_get_alignment(ggml_backend_buffer_get_type(buffer)); } +size_t ggml_backend_buffer_get_max_size(ggml_backend_buffer_t buffer) { + return ggml_backend_buft_get_max_size(ggml_backend_buffer_get_type(buffer)); +} + size_t ggml_backend_buffer_get_alloc_size(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { return ggml_backend_buft_get_alloc_size(ggml_backend_buffer_get_type(buffer), tensor); } @@ -122,6 +132,11 @@ bool ggml_backend_buffer_is_host(ggml_backend_buffer_t buffer) { void ggml_backend_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) { buffer->usage = usage; + + // FIXME: add a generic callback to the buffer interface + if (ggml_backend_buffer_is_multi_buffer(buffer)) { + ggml_backend_multi_buffer_set_usage(buffer, usage); + } } ggml_backend_buffer_type_t ggml_backend_buffer_get_type(ggml_backend_buffer_t buffer) { @@ -171,6 +186,10 @@ size_t ggml_backend_get_alignment(ggml_backend_t backend) { return ggml_backend_buft_get_alignment(ggml_backend_get_default_buffer_type(backend)); } +size_t ggml_backend_get_max_size(ggml_backend_t backend) { + return ggml_backend_buft_get_max_size(ggml_backend_get_default_buffer_type(backend)); +} + void ggml_backend_tensor_set_async(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) { GGML_ASSERT(tensor->data != NULL && "tensor not allocated"); GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds"); @@ -349,6 +368,11 @@ GGML_CALL static void ggml_backend_registry_init(void) { extern GGML_CALL ggml_backend_buffer_type_t ggml_backend_metal_buffer_type(void); ggml_backend_register("Metal", ggml_backend_reg_metal_init, ggml_backend_metal_buffer_type(), NULL); #endif + +#ifdef GGML_USE_VULKAN + extern GGML_CALL int ggml_backend_vk_reg_devices(void); + ggml_backend_vk_reg_devices(); +#endif } GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) { @@ -552,6 +576,7 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cpu_buffer_type(void) { /* .get_name = */ ggml_backend_cpu_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, @@ -607,6 +632,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_hbm_buffer_type(void) { /* .get_name = */ ggml_backend_cpu_hbm_buffer_type_get_name, /* .alloc_buffer = */ ggml_backend_cpu_hbm_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_cpu_buffer_type_get_alignment, + /* .get_max_size = */ NULL, // defaults to SIZE_MAX /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes /* .supports_backend = */ ggml_backend_cpu_buffer_type_supports_backend, /* .is_host = */ ggml_backend_cpu_buffer_type_is_host, @@ -763,6 +789,80 @@ GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, v GGML_UNUSED(user_data); } +// multi-buffer buffer + +struct ggml_backend_multi_buffer_context { + ggml_backend_buffer_t * buffers; + size_t n_buffers; +}; + +typedef struct ggml_backend_multi_buffer_context * ggml_backend_multi_buffer_context_t; + +GGML_CALL static const char * ggml_backend_multi_buffer_get_name(ggml_backend_buffer_t buffer) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + + return ctx->buffers[0]->iface.get_name(ctx->buffers[0]); +} + +GGML_CALL static void ggml_backend_multi_buffer_free_buffer(ggml_backend_buffer_t buffer) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + for (size_t i = 0; i < ctx->n_buffers; i++) { + ggml_backend_buffer_free(ctx->buffers[i]); + } + + free(ctx->buffers); + free(ctx); +} + +GGML_CALL static void ggml_backend_multi_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + for (size_t i = 0; i < ctx->n_buffers; i++) { + ggml_backend_buffer_clear(ctx->buffers[i], value); + } +} + +static struct ggml_backend_buffer_i ggml_backend_multi_buffer_context_interface(void) { + static struct ggml_backend_buffer_i multi_backend_buffer_i = { + /* .get_name = */ ggml_backend_multi_buffer_get_name, + /* .free_buffer = */ ggml_backend_multi_buffer_free_buffer, + /* .get_base = */ NULL, + /* .init_tensor = */ NULL, + /* .set_tensor = */ NULL, + /* .get_tensor = */ NULL, + /* .cpy_tensor = */ NULL, + /* .clear = */ ggml_backend_multi_buffer_clear, + /* .reset = */ NULL, + }; + + return multi_backend_buffer_i; +} + +GGML_CALL ggml_backend_buffer_t ggml_backend_multi_buffer_alloc_buffer(ggml_backend_buffer_t * buffers, size_t n_buffers) { + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) malloc(sizeof(struct ggml_backend_multi_buffer_context)); + ctx->n_buffers = n_buffers; + ctx->buffers = (ggml_backend_buffer_t *) malloc(n_buffers * sizeof(ggml_backend_buffer_t)); + + size_t total_size = 0; + for (size_t i = 0; i < n_buffers; i++) { + ctx->buffers[i] = buffers[i]; + total_size += ggml_backend_buffer_get_size(buffers[i]); + } + + return ggml_backend_buffer_init(buffers[0]->buft, ggml_backend_multi_buffer_context_interface(), ctx, total_size); +} + +GGML_CALL bool ggml_backend_buffer_is_multi_buffer(ggml_backend_buffer_t buffer) { + return buffer->iface.get_name == ggml_backend_multi_buffer_get_name; +} + +GGML_CALL void ggml_backend_multi_buffer_set_usage(ggml_backend_buffer_t buffer, enum ggml_backend_buffer_usage usage) { + GGML_ASSERT(ggml_backend_buffer_is_multi_buffer(buffer)); + ggml_backend_multi_buffer_context_t ctx = (ggml_backend_multi_buffer_context_t) buffer->context; + for (size_t i = 0; i < ctx->n_buffers; i++) { + ggml_backend_buffer_set_usage(ctx->buffers[i], usage); + } +} + // scheduler |