1 files changed, 4 insertions, 12 deletions
diff --git a/llama.cpp b/llama.cpp
index 47809964..4225f955 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3750,14 +3750,6 @@ static bool llm_load_tensors(
     model.main_gpu     = main_gpu;
     model.n_gpu_layers = n_gpu_layers;
 
-#ifdef GGML_USE_SYCL
-    if (split_mode == LLAMA_SPLIT_MODE_NONE) {
-        ggml_backend_sycl_set_single_device(main_gpu);
-        //SYCL use device index (0, 1, 2), instead if device id.
-        main_gpu = ggml_backend_sycl_get_device_index(main_gpu);
-    }
-#endif
-
     const int64_t n_layer     = hparams.n_layer;
     const int64_t i_gpu_start = std::max((int64_t) hparams.n_layer - n_gpu_layers, (int64_t) 0);
 
@@ -12268,13 +12260,13 @@ struct llama_context * llama_new_context_with_model(
                 ctx->backends.push_back(backend);
             } else {
                 // LLAMA_SPLIT_LAYER requires a backend for each GPU
-
+                int id_list[GGML_SYCL_MAX_DEVICES];
+                ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
                 for (int i = 0; i < ggml_backend_sycl_get_device_count(); ++i) {
+                    int device_id = id_list[i];
                     ggml_backend_t backend = ggml_backend_sycl_init(i);
                     if (backend == nullptr) {
-                        int id_list[GGML_SYCL_MAX_DEVICES];
-                        ggml_sycl_get_gpu_list(id_list, GGML_SYCL_MAX_DEVICES);
-                        LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, id_list[i], i);
+                        LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d (index %d)backend\n", __func__, device_id, i);
                         llama_free(ctx);
                         return nullptr;
                     }