diff options
author | M. Yusuf Sarıgöz <yusufsarigoz@gmail.com> | 2023-10-14 13:52:44 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-10-14 04:52:44 -0600 |
commit | 11dc1091f64b24ca6d643acc6d0051117ba60161 (patch) | |
tree | 3b4cb73eeedb24c7b8ef97cc8d315ceeb6df9d7e | |
parent | 2a4bcbacead886996f175f33479d1d874a3e577f (diff) |
Honor -ngl option for Cuda offloading in llava (#3621)
-rw-r--r-- | examples/llava/llava.cpp | 8 |
1 files changed, 7 insertions, 1 deletions
diff --git a/examples/llava/llava.cpp b/examples/llava/llava.cpp index 14dacc78..8384d9d7 100644 --- a/examples/llava/llava.cpp +++ b/examples/llava/llava.cpp @@ -79,7 +79,13 @@ int main(int argc, char ** argv) { llama_backend_init(params.numa); - llama_model_params model_params = llama_model_default_params(); + llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = params.n_gpu_layers; + model_params.main_gpu = params.main_gpu; + model_params.tensor_split = params.tensor_split; + model_params.use_mmap = params.use_mmap; + model_params.use_mlock = params.use_mlock; + llama_model * model = llama_load_model_from_file(params.model.c_str(), model_params); if (model == NULL) { fprintf(stderr , "%s: error: unable to load model\n" , __func__); |