From ee1628bdfea8b0079fed0140ac2f00ef1b465b57 Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Wed, 7 Feb 2024 07:54:50 +0100 Subject: Basic Vulkan Multi-GPU implementation (#5321) * Initial Vulkan multi-gpu implementation Move most global variables into backend context * Add names to backend device functions * Add further missing cleanup code * Reduce code duplication in tensor split layer assignment * generalize LLAMA_SPLIT_LAYER for all backends, do not expose device count and memory in llama.h * Only do device info print in the beginning and initialize one backend for cpu assist Add missing cleanup code * Rework backend memory management to make sure devices and buffers get properly allocated and freed * Rename cpu assist free function --------- Co-authored-by: slaren --- common/common.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'common/common.cpp') diff --git a/common/common.cpp b/common/common.cpp index 8c1a6058..e0082a82 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -46,6 +46,10 @@ #define GGML_USE_CUBLAS_SYCL #endif +#if (defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN) +#define GGML_USE_CUBLAS_SYCL_VULKAN +#endif + int32_t get_num_physical_cores() { #ifdef __linux__ // enumerate the set of thread siblings, num entries is num cores @@ -660,8 +664,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.tensor_split[i] = 0.0f; } } -#ifndef GGML_USE_CUBLAS_SYCL - fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting a tensor split has no effect.\n"); +#ifndef GGML_USE_CUBLAS_SYCL_VULKAN + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL/Vulkan. Setting a tensor split has no effect.\n"); #endif // GGML_USE_CUBLAS_SYCL } else if (arg == "--no-mmap") { params.use_mmap = false; -- cgit v1.2.3