summaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
author0cc4m <picard12@live.de>2024-03-29 17:29:21 +0100
committerGitHub <noreply@github.com>2024-03-29 17:29:21 +0100
commitba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c (patch)
tree041a10dd587c26c42171be18e0f587f1fca2feca /llama.cpp
parentd48ccf3ad4fea5b9ede209c7f40be65371987bfe (diff)
Vulkan k-quant mmq and ggml-backend offload functionality (#6155)
* Fix Vulkan no kv offload incoherence * Add k-quant mul mat mat shaders * Rework working buffer allocation, reduces vram use noticeably Clean up cpu assist code, replaced with ggml-backend offload function * Default to all dedicated GPUs * Add fallback for integrated GPUs if no dedicated GPUs are found * Add debug info which device is allocating memory * Fix Intel dequant issue Fix validation issue * Fix Vulkan GGML_OP_GET_ROWS implementation * Clean up merge artifacts * Remove Vulkan warning
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp19
1 files changed, 14 insertions, 5 deletions
diff --git a/llama.cpp b/llama.cpp
index 97408ba1..21e7a067 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2121,10 +2121,6 @@ struct llama_context {
ggml_backend_free(backend);
}
-#ifdef GGML_USE_VULKAN
- ggml_vk_free_cpu_assist();
-#endif
-
ggml_backend_buffer_free(buf_output);
}
@@ -14131,7 +14127,20 @@ struct llama_context * llama_new_context_with_model(
}
}
#elif defined(GGML_USE_VULKAN)
- if (model->n_gpu_layers > 0) {
+ if (model->split_mode == LLAMA_SPLIT_MODE_ROW) {
+ LLAMA_LOG_ERROR("%s: Row split not supported. Failed to initialize Vulkan backend\n", __func__);
+ llama_free(ctx);
+ return nullptr;
+ }
+ if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
+ ggml_backend_t backend = ggml_backend_vk_init(0);
+ if (backend == nullptr) {
+ LLAMA_LOG_ERROR("%s: failed to initialize Vulkan backend\n", __func__);
+ llama_free(ctx);
+ return nullptr;
+ }
+ ctx->backends.push_back(backend);
+ } else {
for (int device = 0; device < ggml_backend_vk_get_device_count(); ++device) {
ggml_backend_t backend = ggml_backend_vk_init(device);
if (backend == nullptr) {