sync : ggml (#5452)

* ggml-alloc : v3 (ggml/727) * ggml-alloc v3 ggml-ci * fix ci ggml-ci * whisper : check for backend buffer allocation failures * whisper : avoid leaks when initialization fails * cleanup ggml-ci * style fixes ggml-ci * sync : ggml * update llama.cpp, clip.cpp, export-lora.cpp * update finetune.cpp, train-text-from-scratch.cpp ggml-ci * ggml-backend : reduce alignment to 32 to match gguf and fix mmap --------- Co-authored-by: slaren <slarengh@gmail.com>
author: Georgi Gerganov <ggerganov@gmail.com> 2024-02-12 09:16:06 +0200
committer: GitHub <noreply@github.com> 2024-02-12 09:16:06 +0200
commit: 3b169441dfe8e420f88d1592708cc2a871daadb9 (patch)
tree: b554c9eac1b3b7dbf11e364b6a4a748605a6e949 /examples/export-lora/export-lora.cpp
parent: 3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 (diff)
1 files changed, 5 insertions, 14 deletions
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index 4cd5d99b..2f7be8a1 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -337,24 +337,14 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
     params.mem_buffer = NULL;
     params.no_alloc   = true;
     struct ggml_context * ctx = NULL;
-    struct ggml_allocr * alloc = NULL;
-    struct ggml_cgraph * gf = NULL;
+    struct ggml_gallocr * alloc = NULL;
+    struct ggml_cgraph  * gf = NULL;
 
     ctx   = ggml_init(params);
-    alloc = ggml_allocr_new_measure(tensor_alignment);
+    alloc = ggml_gallocr_new(ggml_backend_cpu_buffer_type());
     gf    = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling);
-    size_t alloc_size = ggml_allocr_alloc_graph(alloc, gf);
-    ggml_allocr_free(alloc);
-    ggml_free(ctx);
-
-    static std::vector<uint8_t> data_compute;
-    data_compute.resize(alloc_size + tensor_alignment);
 
-    ctx   = ggml_init(params);
-    alloc = ggml_allocr_new(data_compute.data(), data_compute.size(), tensor_alignment);
-    gf    = build_graph_lora(ctx, tensor, lora_a, lora_b, scaling);
-    ggml_allocr_alloc_graph(alloc, gf);
-    ggml_allocr_free(alloc);
+    ggml_gallocr_alloc_graph(alloc, gf);
 
     struct ggml_cplan cplan = ggml_graph_plan(gf, n_threads);
     static std::vector<uint8_t> data_work;
@@ -363,6 +353,7 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
 
     ggml_graph_compute(gf, &cplan);
 
+    ggml_gallocr_free(alloc);
     ggml_free(ctx);
     return true;
 }
author	Georgi Gerganov <ggerganov@gmail.com>	2024-02-12 09:16:06 +0200
committer	GitHub <noreply@github.com>	2024-02-12 09:16:06 +0200
commit	3b169441dfe8e420f88d1592708cc2a871daadb9 (patch)
tree	b554c9eac1b3b7dbf11e364b6a4a748605a6e949 /examples/export-lora/export-lora.cpp
parent	3bdc4cd0f595a6096cca4a64aa75ffa8a3503465 (diff)