llama : fix compatibility with old 2 expert models (#6735)

author: slaren <slarengh@gmail.com> 2024-04-18 09:04:47 +0200
committer: GitHub <noreply@github.com> 2024-04-18 10:04:47 +0300
commit: c71bfd736ee99a56e697697b39240f2ee06ed26d (patch)
tree: 0d3de9a53215e34a954c33170da34dd9693ec7aa /llama.cpp
parent: 3b8f1ec4b18770531d0b1d792f3edf08254e4f0c (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index f4f4063c..8c144629 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -4592,7 +4592,7 @@ static bool llm_load_tensors(
     size_t ctx_size = ggml_tensor_overhead()*(ml.n_tensors + 1); // +1 for models where tok_embd is duplicated as output
 
     // for moe merged tensors
-    ctx_size += ggml_tensor_overhead()*hparams.n_expert*n_layer;
+    ctx_size += ggml_tensor_overhead()*n_layer*3;
 
     std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
     for (auto & it : buft_layer_count) {
author	slaren <slarengh@gmail.com>	2024-04-18 09:04:47 +0200
committer	GitHub <noreply@github.com>	2024-04-18 10:04:47 +0300
commit	c71bfd736ee99a56e697697b39240f2ee06ed26d (patch)
tree	0d3de9a53215e34a954c33170da34dd9693ec7aa /llama.cpp
parent	3b8f1ec4b18770531d0b1d792f3edf08254e4f0c (diff)