diff options
Diffstat (limited to 'ggml/src')
-rw-r--r-- | ggml/src/ggml-alloc.c | 2 | ||||
-rw-r--r-- | ggml/src/ggml-backend.c | 32 | ||||
-rw-r--r-- | ggml/src/ggml-cuda.cu | 11 | ||||
-rw-r--r-- | ggml/src/ggml.c | 10 |
4 files changed, 55 insertions, 0 deletions
diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index e485326a..d811dee6 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -174,6 +174,8 @@ static size_t ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * alloc, size_t siz // this should never happen fprintf(stderr, "%s: not enough space in the buffer to allocate %zu bytes, largest block available %zu bytes\n", __func__, size, max_avail); + fprintf(stderr, "%s: tensor was %s with %zu elements and %zu bytes\n", __func__, tensor->name, + ggml_nelements(tensor), ggml_nbytes(tensor)); GGML_ABORT("not enough space in the buffer"); } } diff --git a/ggml/src/ggml-backend.c b/ggml/src/ggml-backend.c index e1651cc6..0458bd0c 100644 --- a/ggml/src/ggml-backend.c +++ b/ggml/src/ggml-backend.c @@ -9,6 +9,7 @@ #include <stdlib.h> #include <string.h> +#define IK_PRINT_TIMING 0 #define MAX(a, b) ((a) > (b) ? (a) : (b)) @@ -229,7 +230,17 @@ GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * return; } + +#if IK_PRINT_TIMING + int64_t tim1 = ggml_time_us(); +#endif buf->iface.set_tensor(buf, tensor, data, offset, size); +#if IK_PRINT_TIMING + int64_t tim2 = ggml_time_us(); + //printf("%s(%s) %zu %d us\n", __func__, tensor->name, size, (int)(tim2-tim1)); + printf("%s(%s): %d us\n", __func__, tensor->name, (int)(tim2-tim1)); +#endif + } GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) { @@ -243,7 +254,15 @@ GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * return; } +#if IK_PRINT_TIMING + int64_t tim1 = ggml_time_us(); +#endif buf->iface.get_tensor(buf, tensor, data, offset, size); +#if IK_PRINT_TIMING + int64_t tim2 = ggml_time_us(); + //printf("%s(%s) %zu %d us\n", __func__, tensor->name, size, (int)(tim2-tim1)); + printf("%s(%s): %d us\n", __func__, tensor->name, (int)(tim2-tim1)); +#endif } void ggml_backend_synchronize(ggml_backend_t backend) { @@ -1751,7 +1770,11 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) { static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) { struct ggml_backend_sched_split * splits = sched->splits; + for (int i = 0; i < sched->n_splits; i++) { +#if IK_PRINT_TIMING + int64_t tim1 = ggml_time_us(); +#endif struct ggml_backend_sched_split * split = &splits[i]; int split_backend_id = split->backend_id; ggml_backend_t split_backend = sched->backends[split_backend_id]; @@ -1792,6 +1815,10 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s } if (!sched->callback_eval) { +#if IK_PRINT_TIMING + int64_t tim2 = ggml_time_us(); + printf("%s(.1.): %d us\n", __func__, (int)(tim2-tim1)); +#endif enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph); if (ec != GGML_STATUS_SUCCESS) { return ec; @@ -1814,6 +1841,11 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1); +#if IK_PRINT_TIMING + int64_t tim2 = ggml_time_us(); + printf("%s(.2.): %d us\n", __func__, (int)(tim2-tim1)); +#endif + enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv); if (ec != GGML_STATUS_SUCCESS) { return ec; diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 26d06d56..c305cd89 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -50,6 +50,8 @@ #include <string> #include <vector> +#define IK_PRINT_TIMING 0 + static_assert(sizeof(half) == sizeof(ggml_fp16_t), "wrong fp16 size"); static void ggml_cuda_default_log_callback(enum ggml_log_level level, const char * msg, void * user_data) { @@ -2446,6 +2448,10 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg ggml_cuda_set_peer_access(dst->src[1]->ne[1], ctx.device); } +#if IK_PRINT_TIMING + int64_t tim1 = ggml_time_us(); +#endif + switch (dst->op) { case GGML_OP_REPEAT: ggml_cuda_op_repeat(ctx, dst); @@ -2618,6 +2624,11 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg CUDA_CHECK(err); } +#if IK_PRINT_TIMING + int64_t tim2 = ggml_time_us(); + printf("%s(%s): %d us\n", ggml_op_name(dst->op), dst->name, (int)(tim2 - tim1)); +#endif + return true; } diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 8efe2653..80dd25ff 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4267,6 +4267,9 @@ GGML_CALL int64_t ggml_blck_size(enum ggml_type type) { } GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) { + for (int i = 0; i < GGML_MAX_DIMS; ++i) { + if (tensor->ne[i] <= 0) return 0; + } size_t nbytes; size_t blck_size = ggml_blck_size(tensor->type); if (blck_size == 1) { @@ -21480,6 +21483,9 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl #ifdef GGML_USE_OPENMP if (n_threads > 1) { +//#if IK_PRINT_TIMING +// int64_t tim1 = ggml_time_us(); +//#endif #pragma omp parallel num_threads(n_threads) { #pragma omp single @@ -21496,6 +21502,10 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl }; ggml_graph_compute_thread(&worker); } +//#if IK_PRINT_TIMING +// int64_t tim2 = ggml_time_us(); +// printf("%s(...): %d us\n", __func__, (int)(tim2-tim1)); +//#endif } else { struct ggml_compute_state worker = { .thrd = 0, |