summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>2024-01-07 01:52:42 -0500
committerGitHub <noreply@github.com>2024-01-07 08:52:42 +0200
commit63ee677efd92060b14894b984597c62e3742b8da (patch)
tree173e19abd24941ef5c007d82b9d88eeb0ddb1053
parent67984921a70a7e680a24494aeb7575a66e90685d (diff)
ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)
-rw-r--r--ggml-cuda.cu2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 10c21615..54b266be 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) {
static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) {
#if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__)
c = __builtin_amdgcn_sdot4(a, b, c, false);
-#elif defined(__gfx1100__)
+#elif defined(RDNA3)
c = __builtin_amdgcn_sudot4( true, a, true, b, c, false);
#elif defined(__gfx1010__) || defined(__gfx900__)
int tmp1;