summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CMakeLists.txt7
-rw-r--r--CMakePresets.json31
-rw-r--r--README-sycl.md30
-rw-r--r--examples/sycl/win-build-sycl.bat6
-rw-r--r--ggml-sycl.cpp2
-rw-r--r--ggml-sycl/dpct/helper.hpp414
-rw-r--r--ggml.h6
7 files changed, 241 insertions, 255 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index c90414af..9cfe08d7 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -665,6 +665,7 @@ if (LLAMA_SYCL)
#todo: AOT
find_package(IntelSYCL REQUIRED)
+ find_package(MKL REQUIRED)
message(STATUS "SYCL found")
@@ -679,11 +680,9 @@ if (LLAMA_SYCL)
endif()
add_compile_options(-I./) #include DPCT
- add_compile_options(-I/${SYCL_INCLUDE_DIR})
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3")
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda")
endif()
@@ -693,8 +692,10 @@ if (LLAMA_SYCL)
list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp")
if (WIN32)
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib)
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} IntelSYCL::SYCL_CXX MKL::MKL MKL::MKL_SYCL)
else()
+ add_compile_options(-I/${SYCL_INCLUDE_DIR})
+ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib")
if (LLAMA_SYCL_TARGET STREQUAL "INTEL")
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread)
elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA")
diff --git a/CMakePresets.json b/CMakePresets.json
index e2b7a79e..fba22af9 100644
--- a/CMakePresets.json
+++ b/CMakePresets.json
@@ -11,9 +11,21 @@
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
}
},
-
+ {
+ "name": "sycl-base",
+ "hidden": true,
+ "generator": "Ninja",
+ "binaryDir": "${sourceDir}/build-${presetName}",
+ "cacheVariables": {
+ "CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
+ "CMAKE_CXX_COMPILER": "icx",
+ "LLAMA_SYCL": "ON",
+ "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
+ }
+ },
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
- { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
+ { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
+ { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
{
@@ -35,15 +47,18 @@
},
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
- { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] },
- { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] },
+ { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
+ { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
- { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] },
- { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] },
+ { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
+ { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
- { "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
- { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
+ { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
+ { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
+
+ { "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
+ { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
]
}
diff --git a/README-sycl.md b/README-sycl.md
index bd198470..b7e2bb12 100644
--- a/README-sycl.md
+++ b/README-sycl.md
@@ -410,15 +410,9 @@ Output (example):
4. Install build tools
-a. Download & install cmake for Windows: https://cmake.org/download/
+a. Download & install cmake for Windows: https://cmake.org/download/ (CMake can also be installed from Visual Studio Installer)
+b. The new Visual Studio will install Ninja as default. (If not, please install it manually: https://ninja-build.org/)
-b. Download & install mingw-w64 make for Windows provided by w64devkit
-
-- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
-
-- Extract `w64devkit` on your pc.
-
-- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).
### II. Build llama.cpp
@@ -428,10 +422,10 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
+cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
# Option 2: Or FP16
-cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
+cmake -B build -G "Ninja" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
cmake --build build --config Release -j
```
@@ -441,9 +435,23 @@ Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former in
.\examples\sycl\win-build-sycl.bat
```
+Or, use CMake presets to build:
+```sh
+cmake --preset x64-windows-sycl-release
+cmake --build build-x64-windows-sycl-release -j --target llama-cli
+
+cmake -DLLAMA_SYCL_F16=ON --preset x64-windows-sycl-release
+cmake --build build-x64-windows-sycl-release -j --target llama-cli
+
+cmake --preset x64-windows-sycl-debug
+cmake --build build-x64-windows-sycl-debug -j --target llama-cli
+```
+
+Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
+
*Notes:*
-- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`.
+- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.
### III. Run the inference
diff --git a/examples/sycl/win-build-sycl.bat b/examples/sycl/win-build-sycl.bat
index b8037aae..027173b0 100644
--- a/examples/sycl/win-build-sycl.bat
+++ b/examples/sycl/win-build-sycl.bat
@@ -13,16 +13,16 @@ if %errorlevel% neq 0 goto ERROR
:: for FP16
:: faster for long-prompt inference
-:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
+:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
:: for FP32
-cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
+cmake -G "Ninja" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
if %errorlevel% neq 0 goto ERROR
:: build example/main only
:: make main
:: build all binary
-make -j
+cmake --build . -j
if %errorlevel% neq 0 goto ERROR
cd ..
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index 485f06ad..e5ddf4a3 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -4911,7 +4911,7 @@ static void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor *sr
GGML_ASSERT(ggml_nbytes(src0) <= INT_MAX);
GGML_ASSERT(ggml_nbytes(src1) <= INT_MAX);
- GGML_TENSOR_BINARY_OP_LOCALS;
+ GGML_TENSOR_BINARY_OP_LOCALS01;
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
queue_ptr main_stream = ctx.stream();
diff --git a/ggml-sycl/dpct/helper.hpp b/ggml-sycl/dpct/helper.hpp
index 017fd6ee..1ff29721 100644
--- a/ggml-sycl/dpct/helper.hpp
+++ b/ggml-sycl/dpct/helper.hpp
@@ -588,266 +588,222 @@ namespace dpct
out = prop;
}
- /// dpct device extension
- class device_ext : public sycl::device
- {
- typedef std::mutex mutex_type;
-
- public:
- device_ext() : sycl::device(), _ctx(*this) {}
- ~device_ext()
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- clear_queues();
- }
- device_ext(const sycl::device &base) : sycl::device(base), _ctx(*this)
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- init_queues();
- }
-
- int is_native_atomic_supported() { return 0; }
- int get_major_version() const
- {
- return dpct::get_major_version(*this);
- }
-
- int get_minor_version() const
- {
- return dpct::get_minor_version(*this);
- }
-
- int get_max_compute_units() const
- {
- return get_device_info().get_max_compute_units();
- }
-
- /// Return the maximum clock frequency of this device in KHz.
- int get_max_clock_frequency() const
- {
- return get_device_info().get_max_clock_frequency();
- }
-
- int get_integrated() const { return get_device_info().get_integrated(); }
-
- int get_max_sub_group_size() const
- {
- return get_device_info().get_max_sub_group_size();
- }
-
- int get_max_register_size_per_work_group() const
- {
- return get_device_info().get_max_register_size_per_work_group();
- }
-
- int get_max_work_group_size() const
- {
- return get_device_info().get_max_work_group_size();
- }
-
- int get_mem_base_addr_align() const
- {
- return get_info<sycl::info::device::mem_base_addr_align>();
- }
-
- size_t get_global_mem_size() const
- {
- return get_device_info().get_global_mem_size();
- }
-
- size_t get_max_mem_alloc_size() const
- {
- return get_device_info().get_max_mem_alloc_size();
- }
-
- /// Get the number of bytes of free and total memory on the SYCL device.
- /// \param [out] free_memory The number of bytes of free memory on the SYCL device.
- /// \param [out] total_memory The number of bytes of total memory on the SYCL device.
- void get_memory_info(size_t &free_memory, size_t &total_memory)
- {
- total_memory = get_device_info().get_global_mem_size();
- const char *warning_info = "get_memory_info: [warning] ext_intel_free_memory is not "
- "supported (export/set ZES_ENABLE_SYSMAN=1 to support), "
- "use total memory as free memory";
+ /// dpct device extension
+ class device_ext : public sycl::device {
+ typedef std::mutex mutex_type;
+
+ public:
+ device_ext() : sycl::device() {}
+ ~device_ext() {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ clear_queues();
+ }
+ device_ext(const sycl::device &base) : sycl::device(base) {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ init_queues();
+ }
+
+ int is_native_atomic_supported() { return 0; }
+ int get_major_version() const { return dpct::get_major_version(*this); }
+
+ int get_minor_version() const { return dpct::get_minor_version(*this); }
+
+ int get_max_compute_units() const {
+ return get_device_info().get_max_compute_units();
+ }
+
+ /// Return the maximum clock frequency of this device in KHz.
+ int get_max_clock_frequency() const {
+ return get_device_info().get_max_clock_frequency();
+ }
+
+ int get_integrated() const { return get_device_info().get_integrated(); }
+
+ int get_max_sub_group_size() const {
+ return get_device_info().get_max_sub_group_size();
+ }
+
+ int get_max_register_size_per_work_group() const {
+ return get_device_info().get_max_register_size_per_work_group();
+ }
+
+ int get_max_work_group_size() const {
+ return get_device_info().get_max_work_group_size();
+ }
+
+ int get_mem_base_addr_align() const {
+ return get_info<sycl::info::device::mem_base_addr_align>();
+ }
+
+ size_t get_global_mem_size() const {
+ return get_device_info().get_global_mem_size();
+ }
+
+ size_t get_max_mem_alloc_size() const {
+ return get_device_info().get_max_mem_alloc_size();
+ }
+
+ /// Get the number of bytes of free and total memory on the SYCL device.
+ /// \param [out] free_memory The number of bytes of free memory on the
+ /// SYCL device. \param [out] total_memory The number of bytes of total
+ /// memory on the SYCL device.
+ void get_memory_info(size_t &free_memory, size_t &total_memory) {
+ total_memory = get_device_info().get_global_mem_size();
+ const char *warning_info =
+ "get_memory_info: [warning] ext_intel_free_memory is not "
+ "supported (export/set ZES_ENABLE_SYSMAN=1 to support), "
+ "use total memory as free memory";
#if (defined(__SYCL_COMPILER_VERSION) && __SYCL_COMPILER_VERSION >= 20221105)
- if (!has(sycl::aspect::ext_intel_free_memory))
- {
- std::cerr << warning_info << std::endl;
- free_memory = total_memory;
- }
- else
- {
- free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
- }
+ if (!has(sycl::aspect::ext_intel_free_memory)) {
+ std::cerr << warning_info << std::endl;
+ free_memory = total_memory;
+ } else {
+ free_memory = get_info<sycl::ext::intel::info::device::free_memory>();
+ }
#else
- std::cerr << warning_info << std::endl;
- free_memory = total_memory;
+ std::cerr << warning_info << std::endl;
+ free_memory = total_memory;
#if defined(_MSC_VER) && !defined(__clang__)
#pragma message("Querying the number of bytes of free memory is not supported")
#else
#warning "Querying the number of bytes of free memory is not supported"
#endif
#endif
- }
+ }
- void get_device_info(device_info &out) const
- {
- dpct::get_device_info(out, *this);
- }
+ void get_device_info(device_info &out) const {
+ dpct::get_device_info(out, *this);
+ }
- device_info get_device_info() const
- {
- device_info prop;
- dpct::get_device_info(prop, *this);
- return prop;
- }
+ device_info get_device_info() const {
+ device_info prop;
+ dpct::get_device_info(prop, *this);
+ return prop;
+ }
- void reset()
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- clear_queues();
- init_queues();
- }
+ void reset() {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ clear_queues();
+ init_queues();
+ }
- sycl::queue &in_order_queue() { return *_q_in_order; }
+ sycl::queue &in_order_queue() { return _q_in_order; }
- sycl::queue &out_of_order_queue() { return *_q_out_of_order; }
+ sycl::queue &out_of_order_queue() { return _q_out_of_order; }
- sycl::queue &default_queue()
- {
- return in_order_queue();
- }
+ sycl::queue &default_queue() { return in_order_queue(); }
- void queues_wait_and_throw()
- {
- std::unique_lock<mutex_type> lock(m_mutex);
- std::vector<std::shared_ptr<sycl::queue>> current_queues(
- _queues);
- lock.unlock();
- for (const auto &q : current_queues)
- {
- q->wait_and_throw();
- }
- // Guard the destruct of current_queues to make sure the ref count is safe.
- lock.lock();
+ void queues_wait_and_throw() {
+ std::unique_lock<mutex_type> lock(m_mutex);
+ lock.unlock();
+ for (auto &q : _queues) {
+ q.wait_and_throw();
}
+ // Guard the destruct of current_queues to make sure the ref count is
+ // safe.
+ lock.lock();
+ }
- sycl::queue *create_queue(bool enable_exception_handler = false)
- {
- return create_in_order_queue(enable_exception_handler);
- }
+ sycl::queue create_queue(bool enable_exception_handler = false) {
+ return create_in_order_queue(enable_exception_handler);
+ }
- sycl::queue *create_queue(sycl::context context, sycl::device device,
- bool enable_exception_handler = false) {
- return create_in_order_queue(context, device, enable_exception_handler);
- }
+ sycl::queue create_queue(sycl::device device,
+ bool enable_exception_handler = false) {
+ return create_in_order_queue(device, enable_exception_handler);
+ }
- sycl::queue *create_in_order_queue(bool enable_exception_handler = false) {
- std::lock_guard<mutex_type> lock(m_mutex);
- return create_queue_impl(enable_exception_handler,
- sycl::property::queue::in_order());
- }
+ sycl::queue create_in_order_queue(bool enable_exception_handler = false) {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ return create_queue_impl(enable_exception_handler,
+ sycl::property::queue::in_order());
+ }
- sycl::queue *create_in_order_queue(sycl::context context, sycl::device device,
+ sycl::queue create_in_order_queue(sycl::device device,
bool enable_exception_handler = false) {
- std::lock_guard<mutex_type> lock(m_mutex);
- return create_queue_impl(context, device, enable_exception_handler,
- sycl::property::queue::in_order());
- }
-
- sycl::queue *create_out_of_order_queue(bool enable_exception_handler = false) {
- std::lock_guard<mutex_type> lock(m_mutex);
- return create_queue_impl(enable_exception_handler);
- }
-
- void destroy_queue(sycl::queue *&queue)
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- _queues.erase(std::remove_if(_queues.begin(), _queues.end(),
- [=](const std::shared_ptr<sycl::queue> &q) -> bool
- {
- return q.get() == queue;
- }),
- _queues.end());
- queue = nullptr;
- }
- void set_saved_queue(sycl::queue *q)
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- _saved_queue = q;
- }
- sycl::queue *get_saved_queue() const
- {
- std::lock_guard<mutex_type> lock(m_mutex);
- return _saved_queue;
- }
- sycl::context get_context() const { return _ctx; }
-
- private:
- void clear_queues()
- {
- _queues.clear();
- _q_in_order = _q_out_of_order = _saved_queue = nullptr;
- }
-
- void init_queues()
- {
- _q_in_order = create_queue_impl(true, sycl::property::queue::in_order());
- _q_out_of_order = create_queue_impl(true);
- _saved_queue = &default_queue();
+ std::lock_guard<mutex_type> lock(m_mutex);
+ return create_queue_impl(device, enable_exception_handler,
+ sycl::property::queue::in_order());
+ }
+
+ sycl::queue create_out_of_order_queue(
+ bool enable_exception_handler = false) {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ return create_queue_impl(enable_exception_handler);
+ }
+
+ void destroy_queue(sycl::queue queue) {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ _queues.clear();
+ }
+ void set_saved_queue(sycl::queue q) {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ _saved_queue = q;
+ }
+ sycl::queue get_saved_queue() const {
+ std::lock_guard<mutex_type> lock(m_mutex);
+ return _saved_queue;
+ }
+
+ private:
+ void clear_queues() { _queues.clear(); }
+
+ void init_queues() {
+ _q_in_order =
+ create_queue_impl(true, sycl::property::queue::in_order());
+ _q_out_of_order = create_queue_impl(true);
+ _saved_queue = default_queue();
+ }
+
+ /// Caller should acquire resource \p m_mutex before calling this
+ /// function.
+ template <class... Properties>
+ sycl::queue create_queue_impl(bool enable_exception_handler,
+ Properties... properties) {
+ sycl::async_handler eh = {};
+ if (enable_exception_handler) {
+ eh = exception_handler;
}
-
- /// Caller should acquire resource \p m_mutex before calling this function.
- template <class... Properties>
- sycl::queue *create_queue_impl(bool enable_exception_handler,
- Properties... properties)
- {
- sycl::async_handler eh = {};
- if (enable_exception_handler)
- {
- eh = exception_handler;
- }
- _queues.push_back(std::make_shared<sycl::queue>(
- _ctx, *this, eh,
- sycl::property_list(
+ auto q = sycl::queue(*this, eh,
+ sycl::property_list(
#ifdef DPCT_PROFILING_ENABLED
- sycl::property::queue::enable_profiling(),
+ sycl::property::queue::enable_profiling(),
#endif
- properties...)));
+ properties...));
+ _queues.push_back(q);
- return _queues.back().get();
- }
+ return _queues.back();
+ }
- template <class... Properties>
- sycl::queue *create_queue_impl(sycl::context context, sycl::device device,
+ template <class... Properties>
+ sycl::queue create_queue_impl(sycl::device device,
bool enable_exception_handler,
Properties... properties) {
- sycl::async_handler eh = {};
- if (enable_exception_handler) {
- eh = exception_handler;
- }
- _queues.push_back(std::make_shared<sycl::queue>(
- context, device, eh,
- sycl::property_list(
- #ifdef DPCT_PROFILING_ENABLED
- sycl::property::queue::enable_profiling(),
- #endif
- properties...)));
-
- return _queues.back().get();
+ sycl::async_handler eh = {};
+ if (enable_exception_handler) {
+ eh = exception_handler;
}
-
- void get_version(int &major, int &minor) const
- {
- detail::get_version(*this, major, minor);
- }
- sycl::queue *_q_in_order, *_q_out_of_order;
- sycl::queue *_saved_queue;
- sycl::context _ctx;
- std::vector<std::shared_ptr<sycl::queue>> _queues;
- mutable mutex_type m_mutex;
+ _queues.push_back(
+ sycl::queue(device, eh,
+ sycl::property_list(
+#ifdef DPCT_PROFILING_ENABLED
+ sycl::property::queue::enable_profiling(),
+#endif
+ properties...)));
+
+ return _queues.back();
+ }
+
+ void get_version(int &major, int &minor) const {
+ detail::get_version(*this, major, minor);
+ }
+ sycl::queue _q_in_order, _q_out_of_order;
+ sycl::queue _saved_queue;
+ std::vector<sycl::queue> _queues;
+ mutable mutex_type m_mutex;
};
+
/// device manager
class dev_mgr
{
diff --git a/ggml.h b/ggml.h
index 13502a36..2e8fd0db 100644
--- a/ggml.h
+++ b/ggml.h
@@ -312,6 +312,12 @@
GGML_TENSOR_LOCALS(int64_t, ne, dst, ne) \
GGML_TENSOR_LOCALS(size_t, nb, dst, nb)
+#define GGML_TENSOR_BINARY_OP_LOCALS01 \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb) \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne) \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb)
+
#ifdef __cplusplus
extern "C" {
#endif