summaryrefslogtreecommitdiff
path: root/ggml-cuda/unary.cuh
diff options
context:
space:
mode:
authorCalvin Laurenson <calvin@laurenson.dev>2024-06-16 15:23:04 -0700
committerGitHub <noreply@github.com>2024-06-17 00:23:04 +0200
commit43b35e38ba371f9a7faa6dca4c5d1e8f698ffd87 (patch)
tree11f250899027f3249c9ee15ffaff2048c9b81268 /ggml-cuda/unary.cuh
parent19b7a836f6658e18e973af532a5cc6ad6b3a27f8 (diff)
Add support for sqrt on CUDA (#7953)
* cuda sqrt support * enable cuda in pca * fix comments in pca * add test * add sqrt to ggml_backend_cuda_supports_op * fix test * new line * Use F32 sqrtf instead of F64 sqrt Co-authored-by: Johannes Gäßler <johannesg@5d6.de> --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
Diffstat (limited to 'ggml-cuda/unary.cuh')
-rw-r--r--ggml-cuda/unary.cuh3
1 files changed, 3 insertions, 0 deletions
diff --git a/ggml-cuda/unary.cuh b/ggml-cuda/unary.cuh
index a1d07c04..4cfb0479 100644
--- a/ggml-cuda/unary.cuh
+++ b/ggml-cuda/unary.cuh
@@ -8,6 +8,7 @@
#define CUDA_HARDSIGMOID_BLOCK_SIZE 256
#define CUDA_HARDSWISH_BLOCK_SIZE 256
#define CUDA_SQR_BLOCK_SIZE 256
+#define CUDA_SQRT_BLOCK_SIZE 256
void ggml_cuda_op_gelu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
@@ -28,3 +29,5 @@ void ggml_cuda_op_hardswish(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_leaky_relu(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
void ggml_cuda_op_sqr(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
+
+void ggml_cuda_op_sqrt(ggml_backend_cuda_context & ctx, ggml_tensor * dst);