cuda : rename build flag to LLAMA_CUDA (#6299)

author: slaren <slarengh@gmail.com> 2024-03-26 01:16:01 +0100
committer: GitHub <noreply@github.com> 2024-03-26 01:16:01 +0100
commit: 280345968dabc00d212d43e31145f5c9961a7604 (patch)
tree: 4d0ada8b59a4c15cb6d4fe1a6b4740a30dcdb0f2 /scripts/pod-llama.sh
parent: b06c16ef9f81d84da520232c125d4d8a1d273736 (diff)
1 files changed, 8 insertions, 8 deletions
diff --git a/scripts/pod-llama.sh b/scripts/pod-llama.sh
index 6cf1ab4f..2058ceab 100644
--- a/scripts/pod-llama.sh
+++ b/scripts/pod-llama.sh
@@ -42,7 +42,7 @@ git clone https://github.com/ggerganov/llama.cpp
 
 cd llama.cpp
 
-LLAMA_CUBLAS=1 make -j
+LLAMA_CUDA=1 make -j
 
 ln -sfn /workspace/TinyLlama-1.1B-Chat-v0.3  ./models/tinyllama-1b
 ln -sfn /workspace/CodeLlama-7b-hf           ./models/codellama-7b
@@ -60,7 +60,7 @@ cd /workspace/llama.cpp
 mkdir build-cublas
 cd build-cublas
 
-cmake -DLLAMA_CUBLAS=1 ../
+cmake -DLLAMA_CUDA=1 ../
 make -j
 
 if [ "$1" -eq "0" ]; then
@@ -186,17 +186,17 @@ if [ "$1" -eq "1" ]; then
     # batched
     cd /workspace/llama.cpp
 
-    LLAMA_CUBLAS=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
+    LLAMA_CUDA=1 make -j && ./batched ./models/tinyllama-1b/ggml-model-f16.gguf "Hello, my name is" 8 128 999
 
     # batched-bench
     cd /workspace/llama.cpp
 
-    LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
+    LLAMA_CUDA=1 make -j && ./batched-bench ./models/tinyllama-1b/ggml-model-f16.gguf 4608 1 99 0 512 128 1,2,3,4,5,6,7,8,16,32
 
     # parallel
     cd /workspace/llama.cpp
 
-    LLAMA_CUBLAS=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
+    LLAMA_CUDA=1 make -j && ./parallel -m ./models/tinyllama-1b/ggml-model-f16.gguf -t 1 -ngl 100 -c 4096 -b 512 -s 1 -np 8 -ns 128 -n 100 -cb
 
 fi
 
@@ -204,10 +204,10 @@ fi
 #if [ "$1" -eq "7" ]; then
 #    cd /workspace/llama.cpp
 #
-#    LLAMA_CUBLAS=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
+#    LLAMA_CUDA=1 make -j && ./speculative -m ./models/codellama-34b-instruct/ggml-model-f16.gguf -md ./models/codellama-7b-instruct/ggml-model-q4_0.gguf -p "# Dijkstra's shortest path algorithm in Python (4 spaces indentation) + complexity analysis:\n\n" -e -ngl 999 -ngld 999 -t 4 -n 512 -c 4096 -s 21 --draft 16 -np 1 --temp 0.0
 #fi
 
 # more benches
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
-#LLAMA_CUBLAS=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-7b/ggml-model-q4_k.gguf  4096 1 99 1 512,3200 128,128,800 1
+#LLAMA_CUDA=1 make -j && ./batched-bench ./models/codellama-13b/ggml-model-q4_k.gguf 4096 1 99 1 512,3200 128,128,800 1
author	slaren <slarengh@gmail.com>	2024-03-26 01:16:01 +0100
committer	GitHub <noreply@github.com>	2024-03-26 01:16:01 +0100
commit	280345968dabc00d212d43e31145f5c9961a7604 (patch)
tree	4d0ada8b59a4c15cb6d4fe1a6b4740a30dcdb0f2 /scripts/pod-llama.sh
parent	b06c16ef9f81d84da520232c125d4d8a1d273736 (diff)