diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-02-25 17:55:58 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-25 17:55:58 +0200 |
commit | 94b659a2f106e017e5eeb6f492dc9f290e136833 (patch) | |
tree | 8b744ff19b476f7d08e9691def83ad3fbb27c763 /ggml/src/ggml.c | |
parent | 547eee81d99a2676975a9768166b7d164473b8fa (diff) |
Give the user the option to override where model weights are stored (#232)
* Give the user the option to override where model weights are stored
* Fix ggml_nbytes() problem and cleanup
For a tensor with zero elements ggml_nbytes() was returning
uint64_t::max, and this was causing graph allocation failure.
* Add timing info to CUDA graph evaluation
* Add more timing info
---------
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src/ggml.c')
-rw-r--r-- | ggml/src/ggml.c | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 8efe2653..80dd25ff 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -4267,6 +4267,9 @@ GGML_CALL int64_t ggml_blck_size(enum ggml_type type) { } GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) { + for (int i = 0; i < GGML_MAX_DIMS; ++i) { + if (tensor->ne[i] <= 0) return 0; + } size_t nbytes; size_t blck_size = ggml_blck_size(tensor->type); if (blck_size == 1) { @@ -21480,6 +21483,9 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl #ifdef GGML_USE_OPENMP if (n_threads > 1) { +//#if IK_PRINT_TIMING +// int64_t tim1 = ggml_time_us(); +//#endif #pragma omp parallel num_threads(n_threads) { #pragma omp single @@ -21496,6 +21502,10 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl }; ggml_graph_compute_thread(&worker); } +//#if IK_PRINT_TIMING +// int64_t tim2 = ggml_time_us(); +// printf("%s(...): %d us\n", __func__, (int)(tim2-tim1)); +//#endif } else { struct ggml_compute_state worker = { .thrd = 0, |