summaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
author0cc4m <picard12@live.de>2024-01-26 23:07:32 +0100
committerGitHub <noreply@github.com>2024-01-26 23:07:32 +0100
commita1d6df129bcd3d42cda38c09217d8d4ec4ea3bdd (patch)
treea1952417e976872146db04e46cab11e79745b0dc /ggml.c
parentbbe7c56c9993af86aa2d84cbe1fd69e1b4300cea (diff)
Add OpenCL add kernel (#5151)
* Add OpenCL add kernel * Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c11
1 files changed, 11 insertions, 0 deletions
diff --git a/ggml.c b/ggml.c
index ef6fd8ca..8f57003e 100644
--- a/ggml.c
+++ b/ggml.c
@@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32(
const int ith = params->ith;
const int nth = params->nth;
+#ifdef GGML_USE_CLBLAST
+ if (src1->backend == GGML_BACKEND_GPU) {
+ // TODO: OpenCL kernel support full broadcast
+ GGML_ASSERT(ggml_can_repeat_rows(src1, src0));
+ if (ith == 0) {
+ ggml_cl_add(src0, src1, dst);
+ }
+ return;
+ }
+#endif
+
const int nr = ggml_nrows(src0);
GGML_TENSOR_BINARY_OP_LOCALS