summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
author0cc4m <picard12@live.de>2024-03-29 17:29:21 +0100
committerGitHub <noreply@github.com>2024-03-29 17:29:21 +0100
commitba0c7c70ab5b15f1f2be7fb0dfbe0366dda30d6c (patch)
tree041a10dd587c26c42171be18e0f587f1fca2feca /ggml.c
parentd48ccf3ad4fea5b9ede209c7f40be65371987bfe (diff)
Vulkan k-quant mmq and ggml-backend offload functionality (#6155)
* Fix Vulkan no kv offload incoherence * Add k-quant mul mat mat shaders * Rework working buffer allocation, reduces vram use noticeably Clean up cpu assist code, replaced with ggml-backend offload function * Default to all dedicated GPUs * Add fallback for integrated GPUs if no dedicated GPUs are found * Add debug info which device is allocating memory * Fix Intel dequant issue Fix validation issue * Fix Vulkan GGML_OP_GET_ROWS implementation * Clean up merge artifacts * Remove Vulkan warning
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c35
1 files changed, 0 insertions, 35 deletions
diff --git a/ggml.c b/ggml.c
index 81100e27..7471e792 100644
--- a/ggml.c
+++ b/ggml.c
@@ -278,8 +278,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
#include <Accelerate/Accelerate.h>
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
#include "ggml-opencl.h"
-#elif defined(GGML_USE_VULKAN)
-#include "ggml-vulkan.h"
#endif
#elif defined(GGML_USE_OPENBLAS)
#if defined(GGML_BLAS_USE_MKL)
@@ -289,8 +287,6 @@ inline static void * ggml_calloc(size_t num, size_t size) {
#endif
#elif defined(GGML_USE_CLBLAST)
#include "ggml-opencl.h"
-#elif defined(GGML_USE_VULKAN)
-#include "ggml-vulkan.h"
#endif
// floating point type used to accumulate sums
@@ -2717,8 +2713,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) {
#if defined(GGML_USE_CLBLAST)
ggml_cl_init();
-#elif defined(GGML_USE_VULKAN)
- ggml_vk_init_cpu_assist();
#endif
ggml_setup_op_has_task_pass();
@@ -16128,20 +16122,6 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
return;
}
-#if defined(GGML_USE_VULKAN)
- const bool skip_cpu = ggml_vk_compute_forward_cpu_assist(params, tensor);
-#ifdef GGML_VULKAN_CHECK_RESULTS
- if (skip_cpu) {
- ggml_vk_check_results_1_cpu_assist(params, tensor);
- }
-#endif
- if (skip_cpu) {
- return;
- }
- GGML_ASSERT(tensor->src[0] == NULL || tensor->src[0]->backend == GGML_BACKEND_TYPE_CPU);
- GGML_ASSERT(tensor->src[1] == NULL || tensor->src[1]->backend == GGML_BACKEND_TYPE_CPU);
-#endif // GGML_USE_VULKAN
-
switch (tensor->op) {
case GGML_OP_DUP:
{
@@ -18617,17 +18597,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
}
}
-#ifdef GGML_USE_VULKAN
- for (int i = 0; i < cgraph->n_nodes; i++) {
- ggml_vk_preallocate_buffers_graph_cpu_assist(cgraph->nodes[i]);
- }
- ggml_vk_preallocate_buffers_cpu_assist();
-
- for (int i = 0; i < cgraph->n_nodes; i++) {
- ggml_vk_build_graph_cpu_assist(cgraph->nodes[i], i == cgraph->n_nodes - 1);
- }
-#endif
-
const int n_threads = cplan->n_threads;
struct ggml_compute_state_shared state_shared = {
@@ -18684,10 +18653,6 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
}
}
-#ifdef GGML_USE_VULKAN
- ggml_vk_graph_cleanup_cpu_assist();
-#endif
-
// performance stats (graph)
{
int64_t perf_cycles_cur = ggml_perf_cycles() - perf_start_cycles;