ggml : support CUDA's half type for aarch64(#1455) (#2670)

* ggml: support CUDA's half type for aarch64(#1455) support CUDA's half type for aarch64 in ggml_fp16_t definition * ggml: use __CUDACC__ to recognise nvcc compiler
author: Kylin <56434533+KyL0N@users.noreply.github.com> 2023-08-22 15:14:23 +0800
committer: GitHub <noreply@github.com> 2023-08-22 10:14:23 +0300
commit: 1e3bc523d8053a77df3ac7126a84d0297ee97ef6 (patch)
tree: 4fcee8fbe2d776a99d44a4c6beee727e858c5b6a
parent: 14b1d7e6f720dee41ce5a826376df738096d9033 (diff)
1 files changed, 3 insertions, 2 deletions
diff --git a/ggml.h b/ggml.h
index 544ad2d1..0ec7ec5b 100644
--- a/ggml.h
+++ b/ggml.h
@@ -259,8 +259,9 @@
 extern "C" {
 #endif
 
-#ifdef __ARM_NEON
-    // we use the built-in 16-bit float type
+#if defined(__ARM_NEON) && defined(__CUDACC__)
+    typedef half ggml_fp16_t;
+#elif defined(__ARM_NEON)
     typedef __fp16 ggml_fp16_t;
 #else
     typedef uint16_t ggml_fp16_t;
author	Kylin <56434533+KyL0N@users.noreply.github.com>	2023-08-22 15:14:23 +0800
committer	GitHub <noreply@github.com>	2023-08-22 10:14:23 +0300
commit	1e3bc523d8053a77df3ac7126a84d0297ee97ef6 (patch)
tree	4fcee8fbe2d776a99d44a4c6beee727e858c5b6a
parent	14b1d7e6f720dee41ce5a826376df738096d9033 (diff)