summaryrefslogtreecommitdiff
path: root/ggml/src/ggml-cuda/cp-async.cuh
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/ggml-cuda/cp-async.cuh')
-rw-r--r--ggml/src/ggml-cuda/cp-async.cuh10
1 files changed, 10 insertions, 0 deletions
diff --git a/ggml/src/ggml-cuda/cp-async.cuh b/ggml/src/ggml-cuda/cp-async.cuh
index ecb65999..a87dc247 100644
--- a/ggml/src/ggml-cuda/cp-async.cuh
+++ b/ggml/src/ggml-cuda/cp-async.cuh
@@ -2,6 +2,16 @@
#include "common.cuh"
+static __device__ __forceinline__ unsigned int ggml_cuda_cvta_generic_to_shared(void * generic_ptr) {
+#ifdef CP_ASYNC_AVAILABLE
+ return __cvta_generic_to_shared(generic_ptr);
+#else
+ GGML_UNUSED(generic_ptr);
+ NO_DEVICE_CODE;
+ return 0;
+#endif // CP_ASYNC_AVAILABLE
+}
+
// Copies data from global to shared memory, cg == cache global.
// Both the src and dst pointers must be aligned to 16 bit.
// Shared memory uses 32 bit addressing, the pointer is passed as unsigned int.