summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--common/common.cpp1
-rw-r--r--examples/llama-bench/llama-bench.cpp15
-rw-r--r--ggml.c11
-rw-r--r--ggml.h1
-rw-r--r--llama.cpp5
5 files changed, 23 insertions, 10 deletions
diff --git a/common/common.cpp b/common/common.cpp
index 28801367..0dd1c50c 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1521,6 +1521,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l
fprintf(stream, "cpu_has_avx512_vnni: %s\n", ggml_cpu_has_avx512_vnni() ? "true" : "false");
fprintf(stream, "cpu_has_cublas: %s\n", ggml_cpu_has_cublas() ? "true" : "false");
fprintf(stream, "cpu_has_clblast: %s\n", ggml_cpu_has_clblast() ? "true" : "false");
+ fprintf(stream, "cpu_has_kompute: %s\n", ggml_cpu_has_kompute() ? "true" : "false");
fprintf(stream, "cpu_has_fma: %s\n", ggml_cpu_has_fma() ? "true" : "false");
fprintf(stream, "cpu_has_gpublas: %s\n", ggml_cpu_has_gpublas() ? "true" : "false");
fprintf(stream, "cpu_has_neon: %s\n", ggml_cpu_has_neon() ? "true" : "false");
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index f239415d..542cc7bb 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -563,6 +563,7 @@ struct test {
static const bool cuda;
static const bool opencl;
static const bool vulkan;
+ static const bool kompute;
static const bool metal;
static const bool gpu_blas;
static const bool blas;
@@ -647,6 +648,9 @@ struct test {
if (vulkan) {
return "Vulkan";
}
+ if (kompute) {
+ return "Kompute";
+ }
if (metal) {
return "Metal";
}
@@ -662,7 +666,7 @@ struct test {
static const std::vector<std::string> & get_fields() {
static const std::vector<std::string> fields = {
"build_commit", "build_number",
- "cuda", "opencl", "vulkan", "metal", "gpu_blas", "blas",
+ "cuda", "opencl", "vulkan", "kompute", "metal", "gpu_blas", "blas",
"cpu_info", "gpu_info",
"model_filename", "model_type", "model_size", "model_n_params",
"n_batch", "n_threads", "type_k", "type_v",
@@ -686,8 +690,9 @@ struct test {
field == "avg_ns" || field == "stddev_ns") {
return INT;
}
- if (field == "cuda" || field == "opencl" || field == "vulkan"|| field == "metal" || field == "gpu_blas" || field == "blas" ||
- field == "f16_kv" || field == "no_kv_offload" || field == "mul_mat_q") {
+ if (field == "cuda" || field == "opencl" || field == "vulkan" || field == "kompute" || field == "metal" ||
+ field == "gpu_blas" || field == "blas" || field == "f16_kv" || field == "no_kv_offload" ||
+ field == "mul_mat_q") {
return BOOL;
}
if (field == "avg_ts" || field == "stddev_ts") {
@@ -714,7 +719,8 @@ struct test {
}
std::vector<std::string> values = {
build_commit, std::to_string(build_number),
- std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
+ std::to_string(cuda), std::to_string(opencl), std::to_string(vulkan), std::to_string(vulkan),
+ std::to_string(metal), std::to_string(gpu_blas), std::to_string(blas),
cpu_info, gpu_info,
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
@@ -743,6 +749,7 @@ const int test::build_number = LLAMA_BUILD_NUMBER;
const bool test::cuda = !!ggml_cpu_has_cublas();
const bool test::opencl = !!ggml_cpu_has_clblast();
const bool test::vulkan = !!ggml_cpu_has_vulkan();
+const bool test::kompute = !!ggml_cpu_has_kompute();
const bool test::metal = !!ggml_cpu_has_metal();
const bool test::gpu_blas = !!ggml_cpu_has_gpublas();
const bool test::blas = !!ggml_cpu_has_blas();
diff --git a/ggml.c b/ggml.c
index a7a9ea31..b2c8baaa 100644
--- a/ggml.c
+++ b/ggml.c
@@ -20473,6 +20473,14 @@ int ggml_cpu_has_vulkan(void) {
#endif
}
+int ggml_cpu_has_kompute(void) {
+#if defined(GGML_USE_KOMPUTE)
+ return 1;
+#else
+ return 0;
+#endif
+}
+
int ggml_cpu_has_sycl(void) {
#if defined(GGML_USE_SYCL)
return 1;
@@ -20482,7 +20490,8 @@ int ggml_cpu_has_sycl(void) {
}
int ggml_cpu_has_gpublas(void) {
- return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_sycl();
+ return ggml_cpu_has_cublas() || ggml_cpu_has_clblast() || ggml_cpu_has_vulkan() || ggml_cpu_has_kompute() ||
+ ggml_cpu_has_sycl();
}
int ggml_cpu_has_sse3(void) {
diff --git a/ggml.h b/ggml.h
index bf782e6a..afc87b84 100644
--- a/ggml.h
+++ b/ggml.h
@@ -2266,6 +2266,7 @@ extern "C" {
GGML_API int ggml_cpu_has_cublas (void);
GGML_API int ggml_cpu_has_clblast (void);
GGML_API int ggml_cpu_has_vulkan (void);
+ GGML_API int ggml_cpu_has_kompute (void);
GGML_API int ggml_cpu_has_gpublas (void);
GGML_API int ggml_cpu_has_sse3 (void);
GGML_API int ggml_cpu_has_ssse3 (void);
diff --git a/llama.cpp b/llama.cpp
index 7b9a5c07..a490eeab 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -6878,11 +6878,6 @@ static int llama_decode_internal(
n_threads = std::min(4, n_threads);
}
- const bool fully_offloaded = model.n_gpu_layers >= (int) hparams.n_layer + 1;
- if ((ggml_cpu_has_cublas() || ggml_cpu_has_vulkan()) && fully_offloaded) {
- n_threads = 1;
- }
-
#ifdef GGML_USE_MPI
const int64_t n_layer = hparams.n_layer;
ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer);