summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorM. Yusuf Sarıgöz <yusufsarigoz@gmail.com>2023-10-14 13:52:44 +0300
committerGitHub <noreply@github.com>2023-10-14 04:52:44 -0600
commit11dc1091f64b24ca6d643acc6d0051117ba60161 (patch)
tree3b4cb73eeedb24c7b8ef97cc8d315ceeb6df9d7e /examples
parent2a4bcbacead886996f175f33479d1d874a3e577f (diff)
Honor -ngl option for Cuda offloading in llava (#3621)
Diffstat (limited to 'examples')
-rw-r--r--examples/llava/llava.cpp8
1 files changed, 7 insertions, 1 deletions
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp
index 14dacc78..8384d9d7 100644
--- a/examples/llava/llava.cpp
+++ b/examples/llava/llava.cpp
@@ -79,7 +79,13 @@ int main(int argc, char ** argv) {
llama_backend_init(params.numa);
- llama_model_params model_params = llama_model_default_params();
+ llama_model_params model_params = llama_model_default_params();
+ model_params.n_gpu_layers = params.n_gpu_layers;
+ model_params.main_gpu = params.main_gpu;
+ model_params.tensor_split = params.tensor_split;
+ model_params.use_mmap = params.use_mmap;
+ model_params.use_mlock = params.use_mlock;
+
llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params);
if (model == NULL) {
fprintf(stderr , "%s: error: unable to load model\n" , __func__);