llama : fix mlock with no-mmap with Metal (#5025)

author: slaren <slarengh@gmail.com> 2024-01-18 21:12:15 +0100
committer: GitHub <noreply@github.com> 2024-01-18 21:12:15 +0100
commit: 96d7f56d2918ffde1995dbb32392571deb76d7fc (patch)
tree: e24ffb65e963d3e88591fac1c34d9e1bbf22d63d /llama.cpp
parent: 2d5419d08ab1131623e6a1d554607b7663435e87 (diff)
1 files changed, 5 insertions, 3 deletions
diff --git a/llama.cpp b/llama.cpp
index d28382f7..f1d00a96 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1599,7 +1599,7 @@ struct llama_model {
     std::unique_ptr<llama_mmap> mapping;
 
     // objects representing data potentially being locked in memory
-    llama_mlock mlock_buf;
+    std::vector<std::unique_ptr<llama_mlock>> mlock_bufs;
     llama_mlock mlock_mmap;
 
     // for quantize-stats only
@@ -3815,8 +3815,10 @@ static bool llm_load_tensors(
         else {
             buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft);
             if (buf != nullptr && use_mlock && ggml_backend_buffer_is_host(buf)) {
-                model.mlock_buf.init   (ggml_backend_buffer_get_base(buf));
-                model.mlock_buf.grow_to(ggml_backend_buffer_get_size(buf));
+                model.mlock_bufs.emplace_back(new llama_mlock);
+                auto & mlock_buf = model.mlock_bufs.back();
+                mlock_buf->init   (ggml_backend_buffer_get_base(buf));
+                mlock_buf->grow_to(ggml_backend_buffer_get_size(buf));
             }
         }
         if (buf == nullptr) {
author	slaren <slarengh@gmail.com>	2024-01-18 21:12:15 +0100
committer	GitHub <noreply@github.com>	2024-01-18 21:12:15 +0100
commit	96d7f56d2918ffde1995dbb32392571deb76d7fc (patch)
tree	e24ffb65e963d3e88591fac1c34d9e1bbf22d63d /llama.cpp
parent	2d5419d08ab1131623e6a1d554607b7663435e87 (diff)