diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-12-07 22:26:54 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-12-07 22:26:54 +0200 |
commit | fe680e3d1080a765e5d3150ffd7bab189742898d (patch) | |
tree | cd8be8bf5722d10596923aef7fb44bf8a58378d7 /ggml-backend-impl.h | |
parent | bcc0eb4591bec5ec02fad3f2bdcb1b265052ea56 (diff) |
sync : ggml (new ops, tests, backend, etc.) (#4359)
* sync : ggml (part 1)
* sync : ggml (part 2, CUDA)
* sync : ggml (part 3, Metal)
* ggml : build fixes
ggml-ci
* cuda : restore lost changes
* cuda : restore lost changes (StableLM rope)
* cmake : enable separable compilation for CUDA
ggml-ci
* ggml-cuda : remove device side dequantize
* Revert "cmake : enable separable compilation for CUDA"
This reverts commit 09e35d04b1c4ca67f9685690160b35bc885a89ac.
* cuda : remove assert for rope
* tests : add test-backend-ops
* ggml : fix bug in ggml_concat
* ggml : restore `ggml_get_n_tasks()` logic in `ggml_graph_plan()`
* ci : try to fix macOS
* ggml-backend : remove backend self-registration
* ci : disable Metal for macOS cmake build
ggml-ci
* metal : fix "supports family" call
* metal : fix assert
* metal : print resource path
ggml-ci
---------
Co-authored-by: slaren <slarengh@gmail.com>
Diffstat (limited to 'ggml-backend-impl.h')
-rw-r--r-- | ggml-backend-impl.h | 67 |
1 files changed, 46 insertions, 21 deletions
diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h index 211e3d42..f588af60 100644 --- a/ggml-backend-impl.h +++ b/ggml-backend-impl.h @@ -12,31 +12,50 @@ extern "C" { // Backend buffer // + // buffer type + typedef void * ggml_backend_buffer_type_context_t; + + struct ggml_backend_buffer_type_i { + ggml_backend_buffer_t (*alloc_buffer) (ggml_backend_buffer_type_t buft, size_t size); + size_t (*get_alignment) (ggml_backend_buffer_type_t buft); // tensor alignment + size_t (*get_alloc_size) (ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor); // data size needed to allocate the tensor, including padding + bool (*supports_backend)(ggml_backend_buffer_type_t buft, ggml_backend_t backend); // check if the buffer type is usable by the backend + }; + + struct ggml_backend_buffer_type { + struct ggml_backend_buffer_type_i iface; + ggml_backend_buffer_type_context_t context; + }; + + // buffer typedef void * ggml_backend_buffer_context_t; struct ggml_backend_buffer_i { - void (*free_buffer) (ggml_backend_buffer_t buffer); - void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer - size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback - void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback - void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback + void (*free_buffer)(ggml_backend_buffer_t buffer); + //void (*reset) (ggml_backend_buffer_t buffer); // reset any internal state due to tensor initialization, such as tensor extras + void * (*get_base) (ggml_backend_buffer_t buffer); + void (*init_tensor)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); + void (*set_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*get_tensor) (ggml_backend_buffer_t buffer, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + // (optional) copy tensor between different buffer-type, allow for single-copy tranfers + void (*cpy_tensor_from)(ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst); + void (*cpy_tensor_to) (ggml_backend_buffer_t buffer, struct ggml_tensor * src, struct ggml_tensor * dst); }; struct ggml_backend_buffer { - struct ggml_backend_buffer_i iface; - - ggml_backend_t backend; + struct ggml_backend_buffer_i iface; + ggml_backend_buffer_type_t buft; ggml_backend_buffer_context_t context; - size_t size; }; - GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( - struct ggml_backend * backend, + ggml_backend_buffer_t ggml_backend_buffer_init( + ggml_backend_buffer_type_t buft, struct ggml_backend_buffer_i iface, ggml_backend_buffer_context_t context, size_t size); + // // Backend // @@ -49,20 +68,17 @@ extern "C" { void (*free)(ggml_backend_t backend); // buffer allocation - ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size); + ggml_backend_buffer_type_t (*get_default_buffer_type)(ggml_backend_t backend); - // get buffer alignment - size_t (*get_alignment)(ggml_backend_t backend); - - // tensor data access - // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize + // (optional) asynchroneous tensor data access void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); - void (*synchronize) (ggml_backend_t backend); - // (optional) copy tensor between different backends, allow for single-copy tranfers - void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); - void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); + // (optional) asynchroneous tensor copy + void (*cpy_tensor_from_async)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); + void (*cpy_tensor_to_async) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); + + void (*synchronize) (ggml_backend_t backend); // compute graph with a plan ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph); @@ -82,6 +98,15 @@ extern "C" { ggml_backend_context_t context; }; + + // + // Backend registry + // + + typedef ggml_backend_t (*ggml_backend_init_fn)(const char * params, void * user_data); + + void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data); + #ifdef __cplusplus } #endif |