ggml : add context enumeration functions (#3605)

finetune : fix assert failure in ggml-alloc
author: slaren <slarengh@gmail.com> 2023-10-13 12:23:10 +0200
committer: GitHub <noreply@github.com> 2023-10-13 12:23:10 +0200
commit: 424b6381c4daeed62e6600e0402e72f39845b58d (patch)
tree: 4d9b10030dcc34964ce3d3f879f353f49f641715 /examples
parent: 1e0e873c373c33989beb6bc64d83cd572ab7fe2b (diff)
1 files changed, 9 insertions, 10 deletions
diff --git a/examples/finetune/finetune.cpp b/examples/finetune/finetune.cpp
index 9ae4bc19..35824cd2 100644
--- a/examples/finetune/finetune.cpp
+++ b/examples/finetune/finetune.cpp
@@ -529,13 +529,14 @@ static void init_lora(const struct my_llama_model * model, struct my_llama_lora
     set_param_lora(lora);
 
     // measure data size
-    struct ggml_allocr * alloc = NULL;
-    alloc = ggml_allocr_new_measure(tensor_alignment);
-    alloc_lora(alloc, lora);
+    size_t size = 0;
+    for (struct ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
+        size += GGML_PAD(ggml_nbytes(t), tensor_alignment);
+    }
 
     // allocate data
-    lora->data.resize(ggml_allocr_max_size(alloc) + tensor_alignment);
-    ggml_allocr_free(alloc);
+    struct ggml_allocr * alloc = NULL;
+    lora->data.resize(size + tensor_alignment);
     alloc = ggml_allocr_new(lora->data.data(), lora->data.size(), tensor_alignment);
     alloc_lora(alloc, lora);
     ggml_allocr_free(alloc);
@@ -1714,11 +1715,9 @@ int main(int argc, char ** argv) {
     struct ggml_tensor * target_probs  = ggml_new_tensor_3d(ctx_input, GGML_TYPE_F32, n_vocab,  n_tokens, n_batch);
 
     // measure required memory for input tensors
-    alloc = ggml_allocr_new_measure(tensor_alignment);
-    ggml_allocr_alloc(alloc, tokens_input);
-    ggml_allocr_alloc(alloc, target_probs);
-    size_t max_input_size = ggml_allocr_max_size(alloc) + tensor_alignment;
-    ggml_allocr_free(alloc);
+    size_t max_input_size = GGML_PAD(ggml_nbytes(tokens_input), tensor_alignment) +
+                            GGML_PAD(ggml_nbytes(target_probs), tensor_alignment) +
+                            tensor_alignment;
     printf("%s: input_size = %zu bytes (%.1f MB)\n", __func__, max_input_size, (float) max_input_size / (1024.0f*1024.0f));
 
     // allocate input tensors
author	slaren <slarengh@gmail.com>	2023-10-13 12:23:10 +0200
committer	GitHub <noreply@github.com>	2023-10-13 12:23:10 +0200
commit	424b6381c4daeed62e6600e0402e72f39845b58d (patch)
tree	4d9b10030dcc34964ce3d3f879f353f49f641715 /examples
parent	1e0e873c373c33989beb6bc64d83cd572ab7fe2b (diff)