summaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/CMakeLists.txt3
-rw-r--r--examples/server/server.cpp6
-rw-r--r--examples/sycl/CMakeLists.txt9
-rw-r--r--examples/sycl/README.md47
-rwxr-xr-xexamples/sycl/build.sh20
-rw-r--r--examples/sycl/ls-sycl-device.cpp11
-rwxr-xr-xexamples/sycl/run-llama2.sh19
7 files changed, 112 insertions, 3 deletions
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index f67d74c5..68ad8996 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -23,6 +23,9 @@ else()
add_subdirectory(infill)
add_subdirectory(llama-bench)
add_subdirectory(llava)
+ if (LLAMA_SYCL)
+ add_subdirectory(sycl)
+ endif()
add_subdirectory(main)
add_subdirectory(tokenize)
add_subdirectory(parallel)
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index f58a2aca..a48582ad 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2099,7 +2099,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true;
break;
}
-#ifdef GGML_USE_CUBLAS
+#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
std::string arg_next = argv[i];
// split string by , and /
@@ -2125,7 +2125,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
}
else if (arg == "--no-mul-mat-q" || arg == "-nommq")
{
-#ifdef GGML_USE_CUBLAS
+#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
params.mul_mat_q = false;
#else
LOG_WARNING("warning: llama.cpp was compiled without cuBLAS. Disabling mul_mat_q kernels has no effect.\n", {});
@@ -2138,7 +2138,7 @@ static void server_params_parse(int argc, char **argv, server_params &sparams,
invalid_param = true;
break;
}
-#ifdef GGML_USE_CUBLAS
+#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)
params.main_gpu = std::stoi(argv[i]);
#else
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});
diff --git a/examples/sycl/CMakeLists.txt b/examples/sycl/CMakeLists.txt
new file mode 100644
index 00000000..69cf8932
--- /dev/null
+++ b/examples/sycl/CMakeLists.txt
@@ -0,0 +1,9 @@
+# MIT license
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: MIT
+
+set(TARGET ls-sycl-device)
+add_executable(${TARGET} ls-sycl-device.cpp)
+install(TARGETS ${TARGET} RUNTIME)
+target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
+target_compile_features(${TARGET} PRIVATE cxx_std_17)
diff --git a/examples/sycl/README.md b/examples/sycl/README.md
new file mode 100644
index 00000000..b46f17f3
--- /dev/null
+++ b/examples/sycl/README.md
@@ -0,0 +1,47 @@
+# llama.cpp/example/sycl
+
+This example program provide the tools for llama.cpp for SYCL on Intel GPU.
+
+## Tool
+
+|Tool Name| Function|Status|
+|-|-|-|
+|ls-sycl-device| List all SYCL devices with ID, compute capability, max work group size, ect.|Support|
+
+### ls-sycl-device
+
+List all SYCL devices with ID, compute capability, max work group size, ect.
+
+1. Build the llama.cpp for SYCL for all targets.
+
+2. Enable oneAPI running environment
+
+```
+source /opt/intel/oneapi/setvars.sh
+```
+
+3. Execute
+
+```
+./build/bin/ls-sycl-device
+```
+
+Check the ID in startup log, like:
+
+```
+found 4 SYCL devices:
+ Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
+ Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
+ max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
+ Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
+ max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
+ Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
+ max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
+
+```
+
+|Attribute|Note|
+|-|-|
+|compute capability 1.3|Level-zero running time, recommended |
+|compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh
new file mode 100755
index 00000000..26ad2f7d
--- /dev/null
+++ b/examples/sycl/build.sh
@@ -0,0 +1,20 @@
+
+# MIT license
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: MIT
+
+mkdir -p build
+cd build
+source /opt/intel/oneapi/setvars.sh
+
+#for FP16
+#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
+
+#for FP32
+cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
+#build example/main only
+#cmake --build . --config Release --target main
+
+#build all binary
+cmake --build . --config Release -v
diff --git a/examples/sycl/ls-sycl-device.cpp b/examples/sycl/ls-sycl-device.cpp
new file mode 100644
index 00000000..42847154
--- /dev/null
+++ b/examples/sycl/ls-sycl-device.cpp
@@ -0,0 +1,11 @@
+/*MIT license
+ Copyright (C) 2024 Intel Corporation
+ SPDX-License-Identifier: MIT
+*/
+
+#include "ggml-sycl.h"
+
+int main(int argc, char ** argv) {
+ ggml_backend_sycl_print_sycl_devices();
+ return 0;
+}
diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh
new file mode 100755
index 00000000..f5f4c1e9
--- /dev/null
+++ b/examples/sycl/run-llama2.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# MIT license
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: MIT
+
+INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
+source /opt/intel/oneapi/setvars.sh
+
+if [ $# -gt 0 ]; then
+ export GGML_SYCL_DEVICE=$1
+else
+ export GGML_SYCL_DEVICE=0
+fi
+echo GGML_SYCL_DEVICE=$GGML_SYCL_DEVICE
+#export GGML_SYCL_DEBUG=1
+./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
+#./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 5 -e -ngl 33 -t 1 -s 0
+