From 3d9a55181603e85a26378a850a14068034e5002d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 7 Aug 2023 10:09:40 +0200 Subject: Fixed mmap prefetch for GPU offloading (#2529) --- llama.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 83973987..39aefd49 100644 --- a/llama.cpp +++ b/llama.cpp @@ -747,12 +747,12 @@ struct llama_model_loader { void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { size_t data_size = 0; - size_t prefetch_size = 0; + size_t prefetch_size = file_loader->file.size; size_t lock_size = 0; for (const llama_load_tensor & lt : tensors_map.tensors) { data_size += lt.size; - if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) { - prefetch_size += lt.size; + if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) { + prefetch_size -= lt.size; } } -- cgit v1.2.3