Give the user the option to override where model weights are stored (#232)

* Give the user the option to override where model weights are stored * Fix ggml_nbytes() problem and cleanup For a tensor with zero elements ggml_nbytes() was returning uint64_t::max, and this was causing graph allocation failure. * Add timing info to CUDA graph evaluation * Add more timing info --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
author: Kawrakow <iwankawrakow@gmail.com> 2025-02-25 17:55:58 +0200
committer: GitHub <noreply@github.com> 2025-02-25 17:55:58 +0200
commit: 94b659a2f106e017e5eeb6f492dc9f290e136833 (patch)
tree: 8b744ff19b476f7d08e9691def83ad3fbb27c763 /ggml/src/ggml.c
parent: 547eee81d99a2676975a9768166b7d164473b8fa (diff)
1 files changed, 10 insertions, 0 deletions
diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c
index 8efe2653..80dd25ff 100644
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -4267,6 +4267,9 @@ GGML_CALL int64_t ggml_blck_size(enum ggml_type type) {
 }
 
 GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) {
+    for (int i = 0; i < GGML_MAX_DIMS; ++i) {
+        if (tensor->ne[i] <= 0) return 0;
+    }
     size_t nbytes;
     size_t blck_size = ggml_blck_size(tensor->type);
     if (blck_size == 1) {
@@ -21480,6 +21483,9 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
 
 #ifdef GGML_USE_OPENMP
     if (n_threads > 1) {
+//#if IK_PRINT_TIMING
+//        int64_t tim1 = ggml_time_us();
+//#endif
         #pragma omp parallel num_threads(n_threads)
         {
             #pragma omp single
@@ -21496,6 +21502,10 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
             };
             ggml_graph_compute_thread(&worker);
         }
+//#if IK_PRINT_TIMING
+//        int64_t tim2 = ggml_time_us();
+//        printf("%s(...): %d us\n", __func__, (int)(tim2-tim1));
+//#endif
     } else {
         struct ggml_compute_state worker = {
             .thrd   = 0,
author	Kawrakow <iwankawrakow@gmail.com>	2025-02-25 17:55:58 +0200
committer	GitHub <noreply@github.com>	2025-02-25 17:55:58 +0200
commit	94b659a2f106e017e5eeb6f492dc9f290e136833 (patch)
tree	8b744ff19b476f7d08e9691def83ad3fbb27c763 /ggml/src/ggml.c
parent	547eee81d99a2676975a9768166b7d164473b8fa (diff)