From 3d7ebf63123b8652fb7bbecef7ba731202309901 Mon Sep 17 00:00:00 2001
From: 0cc4m <picard12@live.de>
Date: Mon, 3 Jun 2024 10:59:14 +0200
Subject: Vulkan Mixture of Experts (MoE) support (#7628)

* Finish Vulkan mul_mat_id implementation

* Add Vulkan sum_rows and div ops

* Fix MUL_MAT_ID matrix matrix shader

* Fix MUL_MAT_ID matrix vector shader dispatch size

* Fix MUL_MAT_ID matrix vector shader and dispatch code

* Update Vulkan CPU offload for MUL_MAT_ID

* Fix crash when using split mode none and setting a main GPU
---
 llama.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'llama.cpp')

diff --git a/llama.cpp b/llama.cpp
index 650e8bba..b19c6ff3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -16372,7 +16372,7 @@ struct llama_context * llama_new_context_with_model(
             return nullptr;
         }
         if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
-            ggml_backend_t backend = ggml_backend_vk_init(0);
+            ggml_backend_t backend = ggml_backend_vk_init(model->main_gpu);
             if (backend == nullptr) {
                 LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
                 llama_free(ctx);
-- 
cgit v1.2.3