summaryrefslogtreecommitdiff
path: root/examples/sycl/run-llama2.sh
diff options
context:
space:
mode:
Diffstat (limited to 'examples/sycl/run-llama2.sh')
-rwxr-xr-xexamples/sycl/run-llama2.sh16
1 files changed, 13 insertions, 3 deletions
diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh
index 52f7c01a..c979a52f 100755
--- a/examples/sycl/run-llama2.sh
+++ b/examples/sycl/run-llama2.sh
@@ -9,18 +9,28 @@ source /opt/intel/oneapi/setvars.sh
if [ $# -gt 0 ]; then
GGML_SYCL_DEVICE=$1
+ GGML_SYCL_SINGLE_GPU=1
else
GGML_SYCL_DEVICE=0
fi
-echo "use $GGML_SYCL_DEVICE as main GPU"
+
#export GGML_SYCL_DEBUG=1
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
-#use all GPUs with same max compute units
-ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then
+ echo "use $GGML_SYCL_DEVICE as main GPU"
+ #use signle GPU only
+ ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
+else
+ #use multiple GPUs with same max compute units
+ ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+fi
#use main GPU only
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
+#use multiple GPUs with same max compute units
+#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+