summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp16
1 files changed, 12 insertions, 4 deletions
diff --git a/llama.cpp b/llama.cpp
index e9192b4f..b27aa272 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3750,6 +3750,14 @@ static bool llm_load_tensors(
model.main_gpu = main_gpu;
model.n_gpu_layers = n_gpu_layers;
+#ifdef GGML_USE_SYCL
+ if (split_mode == LLAMA_SPLIT_MODE_NONE) {
+ ggml_backend_sycl_set_single_device(main_gpu);
+ //SYCL use device index (0, 1, 2), instead if device id.
+ main_gpu = ggml_backend_sycl_get_device_index(main_gpu);
+ }
+#endif
+
const int64_t n_layer = hparams.n_layer;
const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
@@ -12260,13 +12268,13 @@ struct llama_context * llama_new_context_with_model(
ctx->backends.push_back(backend);
} else {
// LLAMA_SPLIT_LAYER requires a backend for each GPU
- int id_list[GGML_SYCL_MAX_DEVICES];
- ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
+
for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
- int device_id = id_list[i];
ggml_backend_t backend = ggml_backend_sycl_init(i);
if (backend == nullptr) {
- LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, device_id, i);
+ int id_list[GGML_SYCL_MAX_DEVICES];
+ ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
+ LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, id_list[i], i);
llama_free(ctx);
return nullptr;
}