cuda : rename build flag to LLAMA_CUDA (#6299)

author: slaren <slarengh@gmail.com> 2024-03-26 01:16:01 +0100
committer: GitHub <noreply@github.com> 2024-03-26 01:16:01 +0100
commit: 280345968dabc00d212d43e31145f5c9961a7604 (patch)
tree: 4d0ada8b59a4c15cb6d4fe1a6b4740a30dcdb0f2 /examples/llava
parent: b06c16ef9f81d84da520232c125d4d8a1d273736 (diff)
2 files changed, 3 insertions, 3 deletions
diff --git a/examples/llava/MobileVLM-README.md b/examples/llava/MobileVLM-README.md
index 4d5fef02..b3b66331 100644
--- a/examples/llava/MobileVLM-README.md
+++ b/examples/llava/MobileVLM-README.md
@@ -124,7 +124,7 @@ llama_print_timings:       total time =   34570.79 ms
 ## Orin compile and run
 ### compile
 ```sh
-make LLAMA_CUBLAS=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
+make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
 ```
 
 ### run on Orin
diff --git a/examples/llava/clip.cpp b/examples/llava/clip.cpp
index 48caafa8..40c97626 100644
--- a/examples/llava/clip.cpp
+++ b/examples/llava/clip.cpp
@@ -7,7 +7,7 @@
 #include "ggml-alloc.h"
 #include "ggml-backend.h"
 
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
 #include "ggml-cuda.h"
 #endif
 
@@ -968,7 +968,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
         }
     }
 
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CUDA
     new_clip->backend = ggml_backend_cuda_init(0);
     printf("%s: CLIP using CUDA backend\n", __func__);
 #endif
author	slaren <slarengh@gmail.com>	2024-03-26 01:16:01 +0100
committer	GitHub <noreply@github.com>	2024-03-26 01:16:01 +0100
commit	280345968dabc00d212d43e31145f5c9961a7604 (patch)
tree	4d0ada8b59a4c15cb6d4fe1a6b4740a30dcdb0f2 /examples/llava
parent	b06c16ef9f81d84da520232c125d4d8a1d273736 (diff)