diff options
author | Neo Zhang Jianyu <jianyu.zhang@intel.com> | 2024-03-02 19:49:30 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-02 19:49:30 +0800 |
commit | 715641391dda1ff9762dc5d99d9a30acce99f2c6 (patch) | |
tree | e57b359034b61f8d3ea4de372c2c3c0ec885c943 /examples | |
parent | 9bf297a02bfbd474e51912409a470dd797e2fe13 (diff) |
Support multiple GPUs (split mode) on SYCL backend (#5806)
* suport multiple cards: split-mode - layer|row
* rm warning
* rebase with master, support tow new OPs, close feature for -sm=row, fix for unit test
* update news
* fix merge error
* update according to review comments
Diffstat (limited to 'examples')
-rw-r--r-- | examples/llama-bench/llama-bench.cpp | 17 | ||||
-rw-r--r-- | examples/sycl/ls-sycl-device.cpp | 2 | ||||
-rwxr-xr-x | examples/sycl/run-llama2.sh | 17 |
3 files changed, 19 insertions, 17 deletions
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index c2155b2a..aa79d002 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -123,20 +123,15 @@ static std::string get_gpu_info() { } #endif #ifdef GGML_USE_SYCL - int device_list[GGML_SYCL_MAX_DEVICES]; - ggml_sycl_get_gpu_list(device_list, GGML_SYCL_MAX_DEVICES); - - for (int i = 0; i < GGML_SYCL_MAX_DEVICES; i++) { - if (device_list[i] >0 ){ - char buf[128]; - ggml_sycl_get_device_description(i, buf, sizeof(buf)); - id += buf; + int count = ggml_backend_sycl_get_device_count(); + for (int i = 0; i < count; i++) { + char buf[128]; + ggml_sycl_get_device_description(i, buf, sizeof(buf)); + id += buf; + if (i < count - 1) { id += "/"; } } - if (id.length() >2 ) { - id.pop_back(); - } #endif // TODO: other backends return id; diff --git a/examples/sycl/ls-sycl-device.cpp b/examples/sycl/ls-sycl-device.cpp index 52442e4c..74a8b7fd 100644 --- a/examples/sycl/ls-sycl-device.cpp +++ b/examples/sycl/ls-sycl-device.cpp @@ -7,7 +7,7 @@ #include "ggml-sycl.h" -int main(int argc, char ** argv) { +int main() { ggml_backend_sycl_print_sycl_devices(); return 0; } diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index f5f4c1e9..52f7c01a 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -8,12 +8,19 @@ INPUT2="Building a website can be done in 10 simple steps:\nStep 1:" source /opt/intel/oneapi/setvars.sh if [ $# -gt 0 ]; then - export GGML_SYCL_DEVICE=$1 + GGML_SYCL_DEVICE=$1 else - export GGML_SYCL_DEVICE=0 + GGML_SYCL_DEVICE=0 fi -echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE +echo "use $GGML_SYCL_DEVICE as main GPU" #export GGML_SYCL_DEBUG=1 -./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0 + + +#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer. + +#use all GPUs with same max compute units +ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 + +#use main GPU only +#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none |