Basic Vulkan Multi-GPU implementation (#5321)

* Initial Vulkan multi-gpu implementation Move most global variables into backend context * Add names to backend device functions * Add further missing cleanup code * Reduce code duplication in tensor split layer assignment * generalize LLAMA_SPLIT_LAYER for all backends, do not expose device count and memory in llama.h * Only do device info print in the beginning and initialize one backend for cpu assist Add missing cleanup code * Rework backend memory management to make sure devices and buffers get properly allocated and freed * Rename cpu assist free function --------- Co-authored-by: slaren <slarengh@gmail.com>
author: 0cc4m <picard12@live.de> 2024-02-07 07:54:50 +0100
committer: GitHub <noreply@github.com> 2024-02-07 07:54:50 +0100
commit: ee1628bdfea8b0079fed0140ac2f00ef1b465b57 (patch)
tree: 42ee597afa79a6c4e0bb772d78a7cfcd54777696 /common/common.cpp
parent: ed0bf32290ee5b30ffad5becd99cbecef74aedd7 (diff)
1 files changed, 6 insertions, 2 deletions
diff --git a/common/common.cpp b/common/common.cpp
index 8c1a6058..e0082a82 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -46,6 +46,10 @@
 #define GGML_USE_CUBLAS_SYCL
 #endif
 
+#if (defined(GGML_USE_CUBLAS) || defined(GGML_USE_SYCL)) || defined(GGML_USE_VULKAN)
+#define GGML_USE_CUBLAS_SYCL_VULKAN
+#endif
+
 int32_t get_num_physical_cores() {
 #ifdef __linux__
     // enumerate the set of thread siblings, num entries is num cores
@@ -660,8 +664,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
                     params.tensor_split[i] = 0.0f;
                 }
             }
-#ifndef GGML_USE_CUBLAS_SYCL
-            fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL. Setting a tensor split has no effect.\n");
+#ifndef GGML_USE_CUBLAS_SYCL_VULKAN
+            fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS/SYCL/Vulkan. Setting a tensor split has no effect.\n");
 #endif // GGML_USE_CUBLAS_SYCL
         } else if (arg == "--no-mmap") {
             params.use_mmap = false;
author	0cc4m <picard12@live.de>	2024-02-07 07:54:50 +0100
committer	GitHub <noreply@github.com>	2024-02-07 07:54:50 +0100
commit	ee1628bdfea8b0079fed0140ac2f00ef1b465b57 (patch)
tree	42ee597afa79a6c4e0bb772d78a7cfcd54777696 /common/common.cpp
parent	ed0bf32290ee5b30ffad5becd99cbecef74aedd7 (diff)