summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichael Podvitskiy <podvitskiymichael@gmail.com>2024-02-09 10:42:27 +0100
committerGeorgi Gerganov <ggerganov@gmail.com>2024-02-10 09:29:21 +0200
commit4633d93af08d890ecd00fa6e4f61d76f21cded4c (patch)
tree3c1bc3f11afdca41b89959cd20b52dc29a157bcc
parent4b7b38bef5addbd31f453871d79647fbae6bec8a (diff)
ggml : add abort_callback for cpu backend (ggml/725)
* a way to use abort_callback with the cpu backend * whisper update
-rw-r--r--ggml-backend.c26
-rw-r--r--ggml-backend.h5
-rw-r--r--ggml.c2
-rw-r--r--ggml.h9
4 files changed, 33 insertions, 9 deletions
diff --git a/ggml-backend.c b/ggml-backend.c
index 0764dfeb..532da8ed 100644
--- a/ggml-backend.c
+++ b/ggml-backend.c
@@ -653,6 +653,9 @@ struct ggml_backend_cpu_context {
int n_threads;
void * work_data;
size_t work_size;
+
+ ggml_abort_callback abort_callback;
+ void * abort_callback_data;
};
GGML_CALL static const char * ggml_backend_cpu_name(ggml_backend_t backend) {
@@ -691,6 +694,9 @@ GGML_CALL static ggml_backend_graph_plan_t ggml_backend_cpu_graph_plan_create(gg
cpu_plan->cplan.work_data = malloc(cpu_plan->cplan.work_size);
}
+ cpu_plan->cplan.abort_callback = cpu_ctx->abort_callback;
+ cpu_plan->cplan.abort_callback_data = cpu_ctx->abort_callback_data;
+
return cpu_plan;
}
@@ -721,9 +727,11 @@ GGML_CALL static bool ggml_backend_cpu_graph_compute(ggml_backend_t backend, str
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
cpu_ctx->work_size = cplan.work_size;
}
-
cplan.work_data = cpu_ctx->work_data;
+ cplan.abort_callback = cpu_ctx->abort_callback;
+ cplan.abort_callback_data = cpu_ctx->abort_callback_data;
+
ggml_graph_compute(cgraph, &cplan);
return true;
}
@@ -759,9 +767,11 @@ static struct ggml_backend_i cpu_backend_i = {
ggml_backend_t ggml_backend_cpu_init(void) {
struct ggml_backend_cpu_context * ctx = malloc(sizeof(struct ggml_backend_cpu_context));
- ctx->n_threads = GGML_DEFAULT_N_THREADS;
- ctx->work_data = NULL;
- ctx->work_size = 0;
+ ctx->n_threads = GGML_DEFAULT_N_THREADS;
+ ctx->work_data = NULL;
+ ctx->work_size = 0;
+ ctx->abort_callback = NULL;
+ ctx->abort_callback_data = NULL;
ggml_backend_t cpu_backend = malloc(sizeof(struct ggml_backend));
@@ -783,6 +793,14 @@ void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads) {
ctx->n_threads = n_threads;
}
+void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data) {
+ GGML_ASSERT(ggml_backend_is_cpu(backend_cpu));
+
+ struct ggml_backend_cpu_context * ctx = (struct ggml_backend_cpu_context *)backend_cpu->context;
+ ctx->abort_callback = abort_callback;
+ ctx->abort_callback_data = abort_callback_data;
+}
+
GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size) {
return ggml_backend_buffer_init(ggml_backend_cpu_buffer_type(), cpu_backend_buffer_i_from_ptr, ptr, size);
}
diff --git a/ggml-backend.h b/ggml-backend.h
index 8b8160fc..282b3a9b 100644
--- a/ggml-backend.h
+++ b/ggml-backend.h
@@ -83,8 +83,9 @@ extern "C" {
GGML_API ggml_backend_t ggml_backend_cpu_init(void);
- GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
- GGML_API void ggml_backend_cpu_set_n_threads(ggml_backend_t backend_cpu, int n_threads);
+ GGML_API GGML_CALL bool ggml_backend_is_cpu (ggml_backend_t backend);
+ GGML_API void ggml_backend_cpu_set_n_threads (ggml_backend_t backend_cpu, int n_threads);
+ GGML_API void ggml_backend_cpu_set_abort_callback(ggml_backend_t backend_cpu, ggml_abort_callback abort_callback, void * abort_callback_data);
// Create a backend buffer from an existing pointer
GGML_API GGML_CALL ggml_backend_buffer_t ggml_backend_cpu_buffer_from_ptr(void * ptr, size_t size);
diff --git a/ggml.c b/ggml.c
index f783a6fd..86cd6586 100644
--- a/ggml.c
+++ b/ggml.c
@@ -16649,7 +16649,7 @@ struct ggml_compute_state_shared {
atomic_int node_n; // active graph node
atomic_int node_task; // active graph node task phase
- bool (*abort_callback)(void * data); // abort ggml_graph_compute when true
+ ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
void * abort_callback_data;
};
diff --git a/ggml.h b/ggml.h
index e0a4799f..1360cd8e 100644
--- a/ggml.h
+++ b/ggml.h
@@ -567,6 +567,11 @@ extern "C" {
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
+ // Abort callback
+ // If not NULL, called before ggml computation
+ // If it returns true, the computation is aborted
+ typedef bool (*ggml_abort_callback)(void * data);
+
// the compute plan that needs to be prepared for ggml_graph_compute()
// since https://github.com/ggerganov/ggml/issues/287
struct ggml_cplan {
@@ -576,8 +581,8 @@ extern "C" {
int n_threads;
// abort ggml_graph_compute when true
- bool (*abort_callback)(void * data);
- void * abort_callback_data;
+ ggml_abort_callback abort_callback;
+ void * abort_callback_data;
};
enum ggml_cgraph_eval_order {