From a1d6df129bcd3d42cda38c09217d8d4ec4ea3bdd Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Fri, 26 Jan 2024 23:07:32 +0100 Subject: Add OpenCL add kernel (#5151) * Add OpenCL add kernel * Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results --- ggml.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'ggml.c') diff --git a/ggml.c b/ggml.c index ef6fd8ca..8f57003e 100644 --- a/ggml.c +++ b/ggml.c @@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32( const int ith = params->ith; const int nth = params->nth; +#ifdef GGML_USE_CLBLAST + if (src1->backend == GGML_BACKEND_GPU) { + // TODO: OpenCL kernel support full broadcast + GGML_ASSERT(ggml_can_repeat_rows(src1, src0)); + if (ith == 0) { + ggml_cl_add(src0, src1, dst); + } + return; + } +#endif + const int nr = ggml_nrows(src0); GGML_TENSOR_BINARY_OP_LOCALS -- cgit v1.2.3