summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMeng, Hengyu <hengyu.meng@intel.com>2024-03-24 12:04:25 +0800
committerGitHub <noreply@github.com>2024-03-24 12:04:25 +0800
commitddf65685105a39a57b1e7f80c3aa502a6313af24 (patch)
tree77c67e2a2ab5eed36276d39ac29e095527919192
parentd03224ac9840351023ff8abcf4aa0542258a53df (diff)
[SYCL] offload op (#6217)
* remove no USM methods * leave the schedule to ggml_backend_sched entirely
-rw-r--r--ggml-sycl.cpp293
-rw-r--r--ggml-sycl.h16
-rw-r--r--ggml.c10
-rw-r--r--llama.cpp36
4 files changed, 51 insertions, 304 deletions
diff --git a/ggml-sycl.cpp b/ggml-sycl.cpp
index cc9ee076..fc4d2964 100644
--- a/ggml-sycl.cpp
+++ b/ggml-sycl.cpp
@@ -740,11 +740,7 @@ namespace dpct
sycl::queue &default_queue()
{
-#ifdef DPCT_USM_LEVEL_NONE
- return out_of_order_queue();
-#else
return in_order_queue();
-#endif // DPCT_USM_LEVEL_NONE
}
void queues_wait_and_throw()
@@ -763,11 +759,7 @@ namespace dpct
sycl::queue *create_queue(bool enable_exception_handler = false)
{
-#ifdef DPCT_USM_LEVEL_NONE
- return create_out_of_order_queue(enable_exception_handler);
-#else
return create_in_order_queue(enable_exception_handler);
-#endif // DPCT_USM_LEVEL_NONE
}
sycl::queue *create_queue(sycl::context context, sycl::device device,
@@ -1075,11 +1067,6 @@ namespace dpct
static pointer_access_attribute get_pointer_attribute(sycl::queue &q,
const void *ptr)
{
-#ifdef DPCT_USM_LEVEL_NONE
- return mem_mgr::instance().is_device_ptr(ptr)
- ? pointer_access_attribute::device_only
- : pointer_access_attribute::host_only;
-#else
switch (sycl::get_pointer_type(ptr, q.get_context()))
{
case sycl::usm::alloc::unknown:
@@ -1090,7 +1077,6 @@ namespace dpct
case sycl::usm::alloc::host:
return pointer_access_attribute::host_device;
}
-#endif
}
template <typename ArgT>
@@ -1273,11 +1259,7 @@ namespace dpct
static inline void *dpct_malloc(size_t size, sycl::queue &q)
{
-#ifdef DPCT_USM_LEVEL_NONE
- return mem_mgr::instance().mem_alloc(size * sizeof(byte_t));
-#else
return sycl::malloc_device(size, q.get_device(), q.get_context());
-#endif // DPCT_USM_LEVEL_NONE
}
#define PITCH_DEFAULT_ALIGN(x) (((x) + 31) & ~(0x1F))
@@ -1301,25 +1283,7 @@ namespace dpct
static inline sycl::event dpct_memset(sycl::queue &q, void *dev_ptr,
valueT value, size_t size)
{
-#ifdef DPCT_USM_LEVEL_NONE
- auto &mm = mem_mgr::instance();
- assert(mm.is_device_ptr(dev_ptr));
- auto alloc = mm.translate_ptr(dev_ptr);
- size_t offset = (valueT *)dev_ptr - (valueT *)alloc.alloc_ptr;
-
- return q.submit([&](sycl::handler &cgh)
- {
- auto r = sycl::range<1>(size);
- auto o = sycl::id<1>(offset);
- auto new_buffer = alloc.buffer.reinterpret<valueT>(
- sycl::range<1>(alloc.size / sizeof(valueT)));
- sycl::accessor<valueT, 1, sycl::access_mode::write,
- sycl::access::target::device>
- acc(new_buffer, cgh, r, o);
- cgh.fill(acc, value); });
-#else
return q.fill(dev_ptr, value, size);
-#endif // DPCT_USM_LEVEL_NONE
}
/**
@@ -1413,72 +1377,8 @@ namespace dpct
{
if (!size)
return sycl::event{};
-#ifdef DPCT_USM_LEVEL_NONE
- auto &mm = mem_mgr::instance();
- auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
-
- switch (real_direction)
- {
- case host_to_host:
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
- case host_to_device:
- {
- auto alloc = mm.translate_ptr(to_ptr);
- size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto o = sycl::id<1>(offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- acc(alloc.buffer, cgh, r, o);
- cgh.copy(from_ptr, acc); });
- }
- case device_to_host:
- {
- auto alloc = mm.translate_ptr(from_ptr);
- size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto o = sycl::id<1>(offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- acc(alloc.buffer, cgh, r, o);
- cgh.copy(acc, to_ptr); });
- }
- case device_to_device:
- {
- auto to_alloc = mm.translate_ptr(to_ptr);
- auto from_alloc = mm.translate_ptr(from_ptr);
- size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
- size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto to_o = sycl::id<1>(to_offset);
- auto from_o = sycl::id<1>(from_offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- to_acc(to_alloc.buffer, cgh, r, to_o);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- from_acc(from_alloc.buffer, cgh, r, from_o);
- cgh.copy(from_acc, to_acc); });
- }
- default:
- throw std::runtime_error("dpct_memcpy: invalid direction value");
- }
-#else
return q.memcpy(to_ptr, from_ptr, size, dep_events);
GGML_UNUSED(direction);
-#endif // DPCT_USM_LEVEL_NONE
}
// Get actual copy range and make sure it will not exceed range.
@@ -1618,45 +1518,15 @@ namespace dpct
break;
}
case device_to_device:
-#ifdef DPCT_USM_LEVEL_NONE
- {
- auto &mm = mem_mgr::instance();
- auto to_alloc = mm.translate_ptr(to_surface);
- auto from_alloc = mm.translate_ptr(from_surface);
- size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
- size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
- event_list.push_back(q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto to_o = sycl::id<1>(to_offset);
- auto from_o = sycl::id<1>(from_offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- to_acc(to_alloc.buffer, cgh,
- get_copy_range(size, to_slice, to_range.get(0)), to_o);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- from_acc(from_alloc.buffer, cgh,
- get_copy_range(size, from_slice, from_range.get(0)), from_o);
- cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
- size,
- [=](sycl::id<3> id) {
- to_acc[get_offset(id, to_slice, to_range.get(0))] =
- from_acc[get_offset(id, from_slice, from_range.get(0))];
- }); }));
- }
-#else
- event_list.push_back(q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- cgh.parallel_for<class dpct_memcpy_3d_detail>(
- size,
- [=](sycl::id<3> id) {
- to_surface[get_offset(id, to_slice, to_range.get(0))] =
- from_surface[get_offset(id, from_slice, from_range.get(0))];
- }); }));
-#endif
- break;
+ event_list.push_back(q.submit([&](sycl::handler &cgh){
+ cgh.depends_on(dep_events);
+ cgh.parallel_for<class dpct_memcpy_3d_detail>(
+ size,
+ [=](sycl::id<3> id) {
+ to_surface[get_offset(id, to_slice, to_range.get(0))] =
+ from_surface[get_offset(id, from_slice, from_range.get(0))];
+ }); }));
+ break;
default:
throw std::runtime_error("dpct_memcpy: invalid direction value");
}
@@ -1754,11 +1624,7 @@ namespace dpct
{
if (ptr)
{
-#ifdef DPCT_USM_LEVEL_NONE
- detail::mem_mgr::instance().mem_free(ptr);
-#else
sycl::free(ptr, q.get_context());
-#endif // DPCT_USM_LEVEL_NONE
}
}
@@ -1766,11 +1632,7 @@ namespace dpct
inline auto get_memory(const void *x)
{
T *new_x = reinterpret_cast<T *>(const_cast<void *>(x));
-#ifdef DPCT_USM_LEVEL_NONE
- return dpct::get_buffer<std::remove_cv_t<T>>(new_x);
-#else
return new_x;
-#endif
}
template <typename T>
@@ -2222,72 +2084,8 @@ namespace dpct
{
if (!size)
return sycl::event{};
-#ifdef DPCT_USM_LEVEL_NONE
- auto &mm = mem_mgr::instance();
- auto real_direction = deduce_memcpy_direction(q, to_ptr, from_ptr, direction);
-
- switch (real_direction)
- {
- case host_to_host:
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- cgh.host_task([=] { std::memcpy(to_ptr, from_ptr, size); }); });
- case host_to_device:
- {
- auto alloc = mm.translate_ptr(to_ptr);
- size_t offset = (byte_t *)to_ptr - alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto o = sycl::id<1>(offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- acc(alloc.buffer, cgh, r, o);
- cgh.copy(from_ptr, acc); });
- }
- case device_to_host:
- {
- auto alloc = mm.translate_ptr(from_ptr);
- size_t offset = (byte_t *)from_ptr - alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto o = sycl::id<1>(offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- acc(alloc.buffer, cgh, r, o);
- cgh.copy(acc, to_ptr); });
- }
- case device_to_device:
- {
- auto to_alloc = mm.translate_ptr(to_ptr);
- auto from_alloc = mm.translate_ptr(from_ptr);
- size_t to_offset = (byte_t *)to_ptr - to_alloc.alloc_ptr;
- size_t from_offset = (byte_t *)from_ptr - from_alloc.alloc_ptr;
- return q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto r = sycl::range<1>(size);
- auto to_o = sycl::id<1>(to_offset);
- auto from_o = sycl::id<1>(from_offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- to_acc(to_alloc.buffer, cgh, r, to_o);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- from_acc(from_alloc.buffer, cgh, r, from_o);
- cgh.copy(from_acc, to_acc); });
- }
- default:
- throw std::runtime_error("dpct_memcpy: invalid direction value");
- }
-#else
return q.memcpy(to_ptr, from_ptr, size, dep_events);
GGML_UNUSED(direction);
-#endif // DPCT_USM_LEVEL_NONE
}
// Get actual copy range and make sure it will not exceed range.
@@ -2427,34 +2225,6 @@ namespace dpct
break;
}
case device_to_device:
-#ifdef DPCT_USM_LEVEL_NONE
- {
- auto &mm = mem_mgr::instance();
- auto to_alloc = mm.translate_ptr(to_surface);
- auto from_alloc = mm.translate_ptr(from_surface);
- size_t to_offset = (byte_t *)to_surface - to_alloc.alloc_ptr;
- size_t from_offset = (byte_t *)from_surface - from_alloc.alloc_ptr;
- event_list.push_back(q.submit([&](sycl::handler &cgh)
- {
- cgh.depends_on(dep_events);
- auto to_o = sycl::id<1>(to_offset);
- auto from_o = sycl::id<1>(from_offset);
- sycl::accessor<byte_t, 1, sycl::access_mode::write,
- sycl::access::target::device>
- to_acc(to_alloc.buffer, cgh,
- get_copy_range(size, to_slice, to_range.get(0)), to_o);
- sycl::accessor<byte_t, 1, sycl::access_mode::read,
- sycl::access::target::device>
- from_acc(from_alloc.buffer, cgh,
- get_copy_range(size, from_slice, from_range.get(0)), from_o);
- cgh.parallel_for<class dpct_memcpy_3d_detail_usmnone>(
- size,
- [=](sycl::id<3> id) {
- to_acc[get_offset(id, to_slice, to_range.get(0))] =
- from_acc[get_offset(id, from_slice, from_range.get(0))];
- }); }));
- }
-#else
event_list.push_back(q.submit([&](sycl::handler &cgh)
{
cgh.depends_on(dep_events);
@@ -2464,7 +2234,6 @@ namespace dpct
to_surface[get_offset(id, to_slice, to_range.get(0))] =
from_surface[get_offset(id, from_slice, from_range.get(0))];
}); }));
-#endif
break;
default:
throw std::runtime_error("dpct_memcpy: invalid direction value");
@@ -2655,9 +2424,6 @@ namespace dpct
void *c[], library_data_t c_type, int ldc,
int batch_size, library_data_t scaling_type)
{
-#ifdef DPCT_USM_LEVEL_NONE
- throw std::runtime_error("this API is unsupported when USM level is none");
-#else
if (scaling_type == library_data_t::real_float &&
c_type == library_data_t::complex_float)
{
@@ -2792,7 +2558,6 @@ namespace dpct
default:
throw std::runtime_error("the combination of data type is unsupported");
}
-#endif
}
/// Computes a batch of matrix-matrix product with general matrices.
@@ -3131,24 +2896,9 @@ namespace dpct
template <size_t D = Dimension>
typename std::enable_if<D == 1, T>::type &operator[](size_t index) {
init();
- #ifdef DPCT_USM_LEVEL_NONE
- return dpct::get_buffer<typename std::enable_if<D == 1, T>::type>(
- _device_ptr)
- .template get_access<sycl::access_mode::read_write>()[index];
- #else
return _device_ptr[index];
- #endif // DPCT_USM_LEVEL_NONE
}
- #ifdef DPCT_USM_LEVEL_NONE
- /// Get sycl::accessor for the device memory object when usm is not used.
- accessor_t get_access(sycl::handler &cgh) {
- return get_buffer(_device_ptr)
- .template reinterpret<T, Dimension>(_range)
- .template get_access<detail::memory_traits<Memory, T>::mode,
- detail::memory_traits<Memory, T>::target>(cgh);
- }
- #else
/// Get dpct::accessor with dimension info for the device memory object
/// when usm is used and dimension is greater than 1.
template <size_t D = Dimension>
@@ -3156,7 +2906,6 @@ namespace dpct
get_access(sycl::handler &cgh) {
return dpct_accessor_t((T *)_device_ptr, _range);
}
- #endif // DPCT_USM_LEVEL_NONE
private:
device_memory(value_t *memory_ptr, size_t size)
@@ -3201,15 +2950,6 @@ namespace dpct
/// Default constructor
device_memory() : base(1) {}
-
- #ifdef DPCT_USM_LEVEL_NONE
- /// Get sycl::accessor for the device memory object when usm is not used.
- accessor_t get_access(sycl::handler &cgh) {
- auto buf = get_buffer(base::get_ptr())
- .template reinterpret<T, 1>(sycl::range<1>(1));
- return accessor_t(buf, cgh);
- }
- #endif // DPCT_USM_LEVEL_NONE
};
} // namespace detail
@@ -13181,7 +12921,7 @@ int get_work_group_size(int user_device_id) {
return prop.get_max_work_group_size();
}
-void ggml_init_sycl() try {
+static void ggml_init_sycl() try {
static bool initialized = false;
if (!initialized) {
@@ -16677,6 +16417,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
};
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
+ ggml_init_sycl();
if (device_index>=g_device_count or device_index<0) {
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
device_index, g_device_count-1);
@@ -17046,6 +16787,7 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_split_buffer_type_interface
};
GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split) {
+ ggml_init_sycl();
// FIXME: this is not thread safe
static std::map<std::array<float, GGML_SYCL_MAX_DEVICES>, struct ggml_backend_buffer_type> buft_map;
@@ -17379,6 +17121,13 @@ GGML_CALL static bool ggml_backend_sycl_supports_op(ggml_backend_t backend, cons
UNUSED(backend);
}
+GGML_CALL static bool ggml_backend_sycl_offload_op(ggml_backend_t backend, const ggml_tensor * op) {
+ const int min_batch_size = 32;
+ return op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS;
+ GGML_UNUSED(backend);
+}
+
+
static ggml_backend_i ggml_backend_sycl_interface = {
/* .get_name = */ ggml_backend_sycl_name,
/* .free = */ ggml_backend_sycl_free,
@@ -17392,7 +17141,7 @@ static ggml_backend_i ggml_backend_sycl_interface = {
/* .graph_plan_compute = */ NULL,
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
/* .supports_op = */ ggml_backend_sycl_supports_op,
- /* .offload_op = */ NULL,
+ /* .offload_op = */ ggml_backend_sycl_offload_op,
/* .event_new = */ NULL,
/* .event_free = */ NULL,
/* .event_record = */ NULL,
@@ -17406,7 +17155,7 @@ static ggml_guid_t ggml_backend_sycl_guid() {
}
GGML_CALL ggml_backend_t ggml_backend_sycl_init(int device) {
- ggml_init_sycl(); // TODO: remove from ggml.c
+ ggml_init_sycl();
check_allow_gpu_index(device);
diff --git a/ggml-sycl.h b/ggml-sycl.h
index 1c9d5211..a9f776fc 100644
--- a/ggml-sycl.h
+++ b/ggml-sycl.h
@@ -16,16 +16,22 @@ extern "C" {
#define GGML_SYCL_MAX_DEVICES 48
#define GGML_SYCL_NAME "SYCL"
-GGML_API void ggml_init_sycl(void);
-GGML_API bool ggml_sycl_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
+// backend API
GGML_API ggml_backend_t ggml_backend_sycl_init(int device);
+
+// devide buffer
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device);
+
+// split tensor buffer that splits matrices by rows across multiple devices
+GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
+
+// pinned host buffer for use with the CPU backend for faster copies between CPU and GPU
GGML_API ggml_backend_buffer_type_t ggml_backend_sycl_host_buffer_type(void);
+
GGML_API void ggml_backend_sycl_print_sycl_devices(void);
GGML_API GGML_CALL void ggml_sycl_get_gpu_list(int *id_list, int max_len);
GGML_API GGML_CALL void ggml_sycl_get_device_description(int device, char *description, size_t description_size);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_count();
-GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_type(const float * tensor_split);
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
@@ -34,6 +40,10 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
+
+// SYCL doesn't support registering host memory, keep here for reference
+// GGML_API GGML_CALL bool ggml_backend_sycl_register_host_buffer(void * buffer, size_t size);
+// GGML_API GGML_CALL void ggml_backend_sycl_unregister_host_buffer(void * buffer);
#ifdef __cplusplus
}
#endif
diff --git a/ggml.c b/ggml.c
index 54365b7a..18f10a3d 100644
--- a/ggml.c
+++ b/ggml.c
@@ -291,8 +291,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
#include "ggml-opencl.h"
#elif defined(GGML_USE_VULKAN)
#include "ggml-vulkan.h"
-#elif defined(GGML_USE_SYCL)
-#include "ggml-sycl.h"
#endif
// floating point type used to accumulate sums
@@ -2698,8 +2696,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
ggml_cl_init();
#elif defined(GGML_USE_VULKAN)
ggml_vk_init_cpu_assist();
-#elif defined(GGML_USE_SYCL)
- ggml_init_sycl();
#endif
ggml_setup_op_has_task_pass();
@@ -16115,12 +16111,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
#endif // GGML_USE_VULKAN
-#ifdef GGML_USE_SYCL
- bool skip_cpu = ggml_sycl_compute_forward(params, tensor);
- if (skip_cpu) {
- return;
- }
-#endif // GGML_USE_SYCL
switch (tensor->op) {
case GGML_OP_DUP:
{
diff --git a/llama.cpp b/llama.cpp
index 9614cdb1..61587cb7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -13632,30 +13632,28 @@ struct llama_context * llama_new_context_with_model(
}
}
#elif defined(GGML_USE_SYCL)
- if (model->n_gpu_layers > 0) {
- // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
- if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
- ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
+ // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
+ if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
+ ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
+ if (backend == nullptr) {
+ int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
+ LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
+ llama_free(ctx);
+ return nullptr;
+ }
+ ctx->backends.push_back(backend);
+ } else {
+ // LLAMA_SPLIT_LAYER requires a backend for each GPU
+ for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
+ ggml_backend_t backend = ggml_backend_sycl_init(i);
if (backend == nullptr) {
- int main_gpu_id = ggml_backend_sycl_get_device_id(model->main_gpu);
- LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, main_gpu_id, model->main_gpu);
+ int id_list[GGML_SYCL_MAX_DEVICES];
+ ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
+ LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
llama_free(ctx);
return nullptr;
}
ctx->backends.push_back(backend);
- } else {
- // LLAMA_SPLIT_LAYER requires a backend for each GPU
- for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
- ggml_backend_t backend = ggml_backend_sycl_init(i);
- if (backend == nullptr) {
- int id_list[GGML_SYCL_MAX_DEVICES];
- ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
- LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d) backend\n", __func__, id_list[i], i);
- llama_free(ctx);
- return nullptr;
- }
- ctx->backends.push_back(backend);
- }
}
}
#elif defined(GGML_USE_KOMPUTE)