diff options
author | slaren <slarengh@gmail.com> | 2024-01-18 21:12:15 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-18 21:12:15 +0100 |
commit | 96d7f56d2918ffde1995dbb32392571deb76d7fc (patch) | |
tree | e24ffb65e963d3e88591fac1c34d9e1bbf22d63d /llama.cpp | |
parent | 2d5419d08ab1131623e6a1d554607b7663435e87 (diff) |
llama : fix mlock with no-mmap with Metal (#5025)
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 8 |
1 files changed, 5 insertions, 3 deletions
@@ -1599,7 +1599,7 @@ struct llama_model { std::unique_ptr<llama_mmap> mapping; // objects representing data potentially being locked in memory - llama_mlock mlock_buf; + std::vector<std::unique_ptr<llama_mlock>> mlock_bufs; llama_mlock mlock_mmap; // for quantize-stats only @@ -3815,8 +3815,10 @@ static bool llm_load_tensors( else { buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft); if (buf != nullptr && use_mlock && ggml_backend_buffer_is_host(buf)) { - model.mlock_buf.init (ggml_backend_buffer_get_base(buf)); - model.mlock_buf.grow_to(ggml_backend_buffer_get_size(buf)); + model.mlock_bufs.emplace_back(new llama_mlock); + auto & mlock_buf = model.mlock_bufs.back(); + mlock_buf->init (ggml_backend_buffer_get_base(buf)); + mlock_buf->grow_to(ggml_backend_buffer_get_size(buf)); } } if (buf == nullptr) { |