diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2024-01-13 20:45:45 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-13 20:45:45 +0200 |
commit | 4be5ef556de830c5c4f6e45c05ef4427823fe607 (patch) | |
tree | b40b4e915f478028eca9c6b88bb44bd44784fc91 /ggml-metal.h | |
parent | 0ea069b87bd296c556824e57455433b6c0357340 (diff) |
metal : remove old API (#4919)
ggml-ci
Diffstat (limited to 'ggml-metal.h')
-rw-r--r-- | ggml-metal.h | 55 |
1 files changed, 2 insertions, 53 deletions
diff --git a/ggml-metal.h b/ggml-metal.h index c4b7325d..cd5e2995 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -37,63 +37,12 @@ extern "C" { #endif // -// internal API -// temporary exposed to user-code -// - -struct ggml_metal_context; - -void ggml_metal_log_set_callback(ggml_log_callback log_callback, void * user_data); - -// number of command buffers to use -struct ggml_metal_context * ggml_metal_init(int n_cb); -void ggml_metal_free(struct ggml_metal_context * ctx); - -void * ggml_metal_host_malloc(size_t n); -void ggml_metal_host_free (void * data); - -// set the number of command buffers to use -void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb); - -// creates a mapping between a host memory buffer and a device memory buffer -// - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute -// - the mapping is used during computation to determine the arguments of the compute kernels -// - you don't need to keep the host memory buffer allocated as it is never accessed by Metal -// - max_size specifies the maximum size of a tensor and is used to create shared views such -// that it is guaranteed that the tensor will fit in at least one of the views -// -bool ggml_metal_add_buffer( - struct ggml_metal_context * ctx, - const char * name, - void * data, - size_t size, - size_t max_size); - -// set data from host memory into the device -void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); - -// get data from the device into host memory -void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); - -// try to find operations that can be run concurrently in the graph -// you should run it again if the topology of your graph changes -void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem); - -// if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized -int ggml_metal_if_optimized(struct ggml_metal_context * ctx); - -// output the concur_list for ggml_alloc -int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx); - -// same as ggml_graph_compute but uses Metal -// creates gf->n_threads command buffers in parallel -bool ggml_metal_graph_compute(struct ggml_metal_context * ctx, struct ggml_cgraph * gf); - -// // backend API // user-code should use only these functions // +GGML_API void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data); + GGML_API ggml_backend_t ggml_backend_metal_init(void); GGML_API bool ggml_backend_is_metal(ggml_backend_t backend); |