diff options
Diffstat (limited to 'examples')
-rwxr-xr-x | examples/sycl/build.sh | 5 | ||||
-rwxr-xr-x | examples/sycl/run-llama2.sh | 16 |
2 files changed, 17 insertions, 4 deletions
diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh index 26ad2f7d..f20391d7 100755 --- a/examples/sycl/build.sh +++ b/examples/sycl/build.sh @@ -13,8 +13,11 @@ source /opt/intel/oneapi/setvars.sh #for FP32 cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -#build example/main only +#build example/main #cmake --build . --config Release --target main +#build example/llama-bench +#cmake --build . --config Release --target llama-bench + #build all binary cmake --build . --config Release -v diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index 52f7c01a..c979a52f 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -9,18 +9,28 @@ source /opt/intel/oneapi/setvars.sh if [ $# -gt 0 ]; then GGML_SYCL_DEVICE=$1 + GGML_SYCL_SINGLE_GPU=1 else GGML_SYCL_DEVICE=0 fi -echo "use $GGML_SYCL_DEVICE as main GPU" + #export GGML_SYCL_DEBUG=1 #ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. -#use all GPUs with same max compute units -ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 +if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then + echo "use $GGML_SYCL_DEVICE as main GPU" + #use signle GPU only + ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none +else + #use multiple GPUs with same max compute units + ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 +fi #use main GPU only #ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none +#use multiple GPUs with same max compute units +#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 + |