From 3d7ebf63123b8652fb7bbecef7ba731202309901 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Mon, 3 Jun 2024 10:59:14 +0200 Subject: Vulkan Mixture of Experts (MoE) support (#7628) * Finish Vulkan mul_mat_id implementation * Add Vulkan sum_rows and div ops * Fix MUL_MAT_ID matrix matrix shader * Fix MUL_MAT_ID matrix vector shader dispatch size * Fix MUL_MAT_ID matrix vector shader and dispatch code * Update Vulkan CPU offload for MUL_MAT_ID * Fix crash when using split mode none and setting a main GPU --- llama.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'llama.cpp') diff --git a/llama.cpp b/llama.cpp index 650e8bba..b19c6ff3 100644 --- a/llama.cpp +++ b/llama.cpp @@ -16372,7 +16372,7 @@ struct llama_context * llama_new_context_with_model( return nullptr; } if (model->split_mode == LLAMA_SPLIT_MODE_NONE) { - ggml_backend_t backend = ggml_backend_vk_init(0); + ggml_backend_t backend = ggml_backend_vk_init(model->main_gpu); if (backend == nullptr) { LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__); llama_free(ctx); -- cgit v1.2.3