diff options
author | 0cc4m <picard12@live.de> | 2024-01-26 23:07:32 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-26 23:07:32 +0100 |
commit | a1d6df129bcd3d42cda38c09217d8d4ec4ea3bdd (patch) | |
tree | a1952417e976872146db04e46cab11e79745b0dc /ggml.c | |
parent | bbe7c56c9993af86aa2d84cbe1fd69e1b4300cea (diff) |
Add OpenCL add kernel (#5151)
* Add OpenCL add kernel
* Put add kernel into different string to stay within MSVC string length limit, disable float16 support due to bad results
Diffstat (limited to 'ggml.c')
-rw-r--r-- | ggml.c | 11 |
1 files changed, 11 insertions, 0 deletions
@@ -7207,6 +7207,17 @@ static void ggml_compute_forward_add_f32( const int ith = params->ith; const int nth = params->nth; +#ifdef GGML_USE_CLBLAST + if (src1->backend == GGML_BACKEND_GPU) { + // TODO: OpenCL kernel support full broadcast + GGML_ASSERT(ggml_can_repeat_rows(src1, src0)); + if (ith == 0) { + ggml_cl_add(src0, src1, dst); + } + return; + } +#endif + const int nr = ggml_nrows(src0); GGML_TENSOR_BINARY_OP_LOCALS |