From 2bf8d0f7c4cc1235755ad06961ca761e458c5e55 Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 18 Mar 2024 11:03:04 +0100 Subject: backend : offload large batches to GPU (#6083) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * backend : offload large batches to GPU * fix hip * code cleanup * fix CUDA split buffers * Update ggml-backend-impl.h Co-authored-by: Johannes Gäßler * cuda : fix memset without set_device * imatrix : remove sched affix from weight names * sched : add a new split if the current one has too many inputs reduce max inputs per split more cleanup * update backends ggml-ci --------- Co-authored-by: Johannes Gäßler --- examples/llama-bench/llama-bench.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'examples/llama-bench/llama-bench.cpp') diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 32eea786..4cb23080 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -114,10 +114,10 @@ static std::string get_cpu_info() { static std::string get_gpu_info() { std::string id; #ifdef GGML_USE_CUBLAS - int count = ggml_cuda_get_device_count(); + int count = ggml_backend_cuda_get_device_count(); for (int i = 0; i < count; i++) { char buf[128]; - ggml_cuda_get_device_description(i, buf, sizeof(buf)); + ggml_backend_cuda_get_device_description(i, buf, sizeof(buf)); id += buf; if (i < count - 1) { id += "/"; -- cgit v1.2.3