diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-11-13 14:16:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-13 14:16:23 +0200 |
commit | 4760e7cc0b68570d58f55e8dda469805d1759d0d (patch) | |
tree | cd983b1f2833f0094c0539f7943703c6787bf12b /ggml-backend-impl.h | |
parent | bb50a792ec2a49944470c82694fa364345e95170 (diff) |
sync : ggml (backend v2) (#3912)
* sync : ggml (backend v2) (wip)
* sync : migrate examples and llama.cpp to dynamic graphs (wip)
* sync : update tests + fix max op params to 64
ggml-ci
* sync : ggml-cuda
ggml-ci
* llama : fix save/load state context size
ggml-ci
* sync : try to fix build on tvOS
* sync : pass custom graph sizes in training examples
* sync : update graph copies to new ggml API
* sync : update sync-ggml.sh with new files
* scripts : fix header in sync script
* train : fix context size calculations
* llama : increase inference graph size up to 4096 nodes
* train : allocate grads for backward graphs
* train : allocate grads for gb_tmp
Diffstat (limited to 'ggml-backend-impl.h')
-rw-r--r-- | ggml-backend-impl.h | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/ggml-backend-impl.h b/ggml-backend-impl.h new file mode 100644 index 00000000..211e3d42 --- /dev/null +++ b/ggml-backend-impl.h @@ -0,0 +1,87 @@ +#pragma once + +// ggml-backend internal header + +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + + // + // Backend buffer + // + + typedef void * ggml_backend_buffer_context_t; + + struct ggml_backend_buffer_i { + void (*free_buffer) (ggml_backend_buffer_t buffer); + void * (*get_base) (ggml_backend_buffer_t buffer); // get base pointer + size_t (*get_alloc_size)(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-allocation callback + void (*init_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // post-allocation callback + void (*free_tensor) (ggml_backend_buffer_t buffer, struct ggml_tensor * tensor); // pre-free callback + }; + + struct ggml_backend_buffer { + struct ggml_backend_buffer_i iface; + + ggml_backend_t backend; + ggml_backend_buffer_context_t context; + + size_t size; + }; + + GGML_API ggml_backend_buffer_t ggml_backend_buffer_init( + struct ggml_backend * backend, + struct ggml_backend_buffer_i iface, + ggml_backend_buffer_context_t context, + size_t size); + + // + // Backend + // + + typedef void * ggml_backend_context_t; + + struct ggml_backend_i { + const char * (*get_name)(ggml_backend_t backend); + + void (*free)(ggml_backend_t backend); + + // buffer allocation + ggml_backend_buffer_t (*alloc_buffer)(ggml_backend_t backend, size_t size); + + // get buffer alignment + size_t (*get_alignment)(ggml_backend_t backend); + + // tensor data access + // these functions can be asynchronous, helper functions are provided for synchronous access that automatically call synchronize + void (*set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size); + void (*get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size); + void (*synchronize) (ggml_backend_t backend); + + // (optional) copy tensor between different backends, allow for single-copy tranfers + void (*cpy_tensor_from)(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); + void (*cpy_tensor_to) (ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); + + // compute graph with a plan + ggml_backend_graph_plan_t (*graph_plan_create) (ggml_backend_t backend, struct ggml_cgraph * cgraph); + void (*graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan); + void (*graph_plan_compute)(ggml_backend_t backend, ggml_backend_graph_plan_t plan); + + // compute graph without a plan + void (*graph_compute)(ggml_backend_t backend, struct ggml_cgraph * cgraph); + + // check if the backend supports an operation + bool (*supports_op)(ggml_backend_t backend, const struct ggml_tensor * op); + }; + + struct ggml_backend { + struct ggml_backend_i iface; + + ggml_backend_context_t context; + }; + +#ifdef __cplusplus +} +#endif |