diff options
Diffstat (limited to 'ggml.c')
-rw-r--r-- | ggml.c | 29 |
1 files changed, 23 insertions, 6 deletions
@@ -320,6 +320,16 @@ static ggml_fp16_t ggml_table_exp_f16[1 << 16]; // precomputed f32 table for f16 (256 KB) (ggml-impl.h) float ggml_table_f32_f16[1 << 16]; +const char * ggml_status_to_string(enum ggml_status status) { + switch (status) { + case GGML_STATUS_ALLOC_FAILED: return "GGML status: error (failed to allocate memory)"; + case GGML_STATUS_FAILED: return "GGML status: error (operation failed)"; + case GGML_STATUS_SUCCESS: return "GGML status: success"; + case GGML_STATUS_ABORTED: return "GGML status: warning (operation aborted)"; + default: GGML_ASSERT(false); + } +} + // note: do not use these inside ggml.c // these are meant to be used via the ggml.h API float ggml_fp16_to_fp32(ggml_fp16_t x) { @@ -17400,6 +17410,7 @@ struct ggml_compute_state { ggml_thread_t thrd; int ith; struct ggml_compute_state_shared * shared; + enum ggml_status ec; }; static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const struct ggml_compute_state_shared * st) { @@ -17693,7 +17704,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { while (true) { if (cplan->abort_callback && cplan->abort_callback(cplan->abort_callback_data)) { state->shared->node_n += 1; - return (thread_ret_t) GGML_EXIT_ABORTED; + state->ec = GGML_STATUS_ABORTED; + return 0; } if (atomic_fetch_sub(&state->shared->n_active, 1) == 1) { @@ -17815,7 +17827,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { } } - return GGML_EXIT_SUCCESS; + return 0; } struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threads) { @@ -18011,7 +18023,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa return cplan; } -int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { +enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { { GGML_ASSERT(cplan); GGML_ASSERT(cplan->n_threads > 0); @@ -18055,6 +18067,7 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { .thrd = 0, .ith = j, .shared = &state_shared, + .ec = GGML_STATUS_SUCCESS, }; const int rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_thread, &workers[j]); @@ -18065,12 +18078,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { workers[0].ith = 0; workers[0].shared = &state_shared; + workers[0].ec = GGML_STATUS_SUCCESS; const int64_t perf_start_cycles = ggml_perf_cycles(); const int64_t perf_start_time_us = ggml_perf_time_us(); // this is a work thread too - int compute_status = (size_t) ggml_graph_compute_thread(&workers[0]); + ggml_graph_compute_thread(&workers[0]); + enum ggml_status compute_status = workers[0].ec; // don't leave affinity set on the main thread clear_numa_thread_affinity(); @@ -18080,6 +18095,8 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { for (int j = 1; j < n_threads; j++) { const int rc = ggml_thread_join(workers[j].thrd, NULL); GGML_ASSERT(rc == 0); + if (workers[j].ec != GGML_STATUS_SUCCESS) + compute_status = workers[j].ec; } } @@ -18107,14 +18124,14 @@ int ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) { return compute_status; } -void ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) { +enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) { struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads); struct ggml_object * obj = ggml_new_object(ctx, GGML_OBJECT_TYPE_WORK_BUFFER, cplan.work_size); cplan.work_data = (uint8_t *)ctx->mem_buffer + obj->offs; - ggml_graph_compute(cgraph, &cplan); + return ggml_graph_compute(cgraph, &cplan); } struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) { |