diff options
author | 0cc4m <picard12@live.de> | 2024-03-29 17:29:21 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-29 17:29:21 +0100 |
commit | ba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c (patch) | |
tree | 041a10dd587c26c42171be18e0f587f1fca2feca /ggml.c | |
parent | d48ccf3ad4fea5b9ede209c7f40be65371987bfe (diff) |
Vulkan k-quant mmq and ggml-backend offload functionality (#6155)
* Fix Vulkan no kv offload incoherence
* Add k-quant mul mat mat shaders
* Rework working buffer allocation, reduces vram use noticeably
Clean up cpu assist code, replaced with ggml-backend offload function
* Default to all dedicated GPUs
* Add fallback for integrated GPUs if no dedicated GPUs are found
* Add debug info which device is allocating memory
* Fix Intel dequant issue
Fix validation issue
* Fix Vulkan GGML_OP_GET_ROWS implementation
* Clean up merge artifacts
* Remove Vulkan warning
Diffstat (limited to 'ggml.c')
-rw-r--r-- | ggml.c | 35 |
1 files changed, 0 insertions, 35 deletions
@@ -278,8 +278,6 @@ inline static void * ggml_calloc(size_t num, size_t size) { #include <Accelerate/Accelerate.h> #if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions #include "ggml-opencl.h" -#elif defined(GGML_USE_VULKAN) -#include "ggml-vulkan.h" #endif #elif defined(GGML_USE_OPENBLAS) #if defined(GGML_BLAS_USE_MKL) @@ -289,8 +287,6 @@ inline static void * ggml_calloc(size_t num, size_t size) { #endif #elif defined(GGML_USE_CLBLAST) #include "ggml-opencl.h" -#elif defined(GGML_USE_VULKAN) -#include "ggml-vulkan.h" #endif // floating point type used to accumulate sums @@ -2717,8 +2713,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { #if defined(GGML_USE_CLBLAST) ggml_cl_init(); -#elif defined(GGML_USE_VULKAN) - ggml_vk_init_cpu_assist(); #endif ggml_setup_op_has_task_pass(); @@ -16128,20 +16122,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm return; } -#if defined(GGML_USE_VULKAN) - const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor); -#ifdef GGML_VULKAN_CHECK_RESULTS - if (skip_cpu) { - ggml_vk_check_results_1_cpu_assist(params, tensor); - } -#endif - if (skip_cpu) { - return; - } - GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU); - GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU); -#endif // GGML_USE_VULKAN - switch (tensor->op) { case GGML_OP_DUP: { @@ -18617,17 +18597,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl } } -#ifdef GGML_USE_VULKAN - for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]); - } - ggml_vk_preallocate_buffers_cpu_assist(); - - for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1); - } -#endif - const int n_threads = cplan->n_threads; struct ggml_compute_state_shared state_shared = { @@ -18684,10 +18653,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl } } -#ifdef GGML_USE_VULKAN - ggml_vk_graph_cleanup_cpu_assist(); -#endif - // performance stats (graph) { int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles; |