sync : ggml (backend v2) (#3912)

* sync : ggml (backend v2) (wip) * sync : migrate examples and llama.cpp to dynamic graphs (wip) * sync : update tests + fix max op params to 64 ggml-ci * sync : ggml-cuda ggml-ci * llama : fix save/load state context size ggml-ci * sync : try to fix build on tvOS * sync : pass custom graph sizes in training examples * sync : update graph copies to new ggml API * sync : update sync-ggml.sh with new files * scripts : fix header in sync script * train : fix context size calculations * llama : increase inference graph size up to 4096 nodes * train : allocate grads for backward graphs * train : allocate grads for gb_tmp
author: Georgi Gerganov <ggerganov@gmail.com> 2023-11-13 14:16:23 +0200
committer: GitHub <noreply@github.com> 2023-11-13 14:16:23 +0200
commit: 4760e7cc0b68570d58f55e8dda469805d1759d0d (patch)
tree: cd983b1f2833f0094c0539f7943703c6787bf12b /examples/export-lora/export-lora.cpp
parent: bb50a792ec2a49944470c82694fa364345e95170 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/export-lora/export-lora.cpp b/examples/export-lora/export-lora.cpp
index d803cfd5..c8754ce7 100644
--- a/examples/export-lora/export-lora.cpp
+++ b/examples/export-lora/export-lora.cpp
@@ -240,7 +240,7 @@ static struct lora_data * load_lora(struct lora_info * info) {
     }
 
     struct ggml_init_params params_ggml;
-    params_ggml.mem_size   = ggml_tensor_overhead() * GGML_MAX_NODES;
+    params_ggml.mem_size   = ggml_tensor_overhead() * GGML_DEFAULT_GRAPH_SIZE;
     params_ggml.mem_buffer = NULL;
     params_ggml.no_alloc   = true;
     result->ctx = ggml_init(params_ggml);
@@ -334,7 +334,7 @@ static bool apply_lora(struct ggml_tensor * tensor, struct lora_data * lora, int
     float scaling = lora->info.scale * (float)lora->lora_alpha / (float)lora->lora_r;
 
     struct ggml_init_params params;
-    params.mem_size   = GGML_OBJECT_SIZE + GGML_GRAPH_SIZE + ggml_tensor_overhead()*4 + GGML_MEM_ALIGN*5;
+    params.mem_size   = GGML_OBJECT_SIZE + ggml_graph_overhead() + ggml_tensor_overhead()*4 + GGML_MEM_ALIGN*5;
     params.mem_buffer = NULL;
     params.no_alloc   = true;
     struct ggml_context * ctx = NULL;
author	Georgi Gerganov <ggerganov@gmail.com>	2023-11-13 14:16:23 +0200
committer	GitHub <noreply@github.com>	2023-11-13 14:16:23 +0200
commit	4760e7cc0b68570d58f55e8dda469805d1759d0d (patch)
tree	cd983b1f2833f0094c0539f7943703c6787bf12b /examples/export-lora/export-lora.cpp
parent	bb50a792ec2a49944470c82694fa364345e95170 (diff)