diff options
author | Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com> | 2024-01-07 01:52:42 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-01-07 08:52:42 +0200 |
commit | 63ee677efd92060b14894b984597c62e3742b8da (patch) | |
tree | 173e19abd24941ef5c007d82b9d88eeb0ddb1053 | |
parent | 67984921a70a7e680a24494aeb7575a66e90685d (diff) |
ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)
-rw-r--r-- | ggml-cuda.cu | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 10c21615..54b266be 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) { static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) { #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) c = __builtin_amdgcn_sdot4(a, b, c, false); -#elif defined(__gfx1100__) +#elif defined(RDNA3) c = __builtin_amdgcn_sudot4( true, a, true, b, c, false); #elif defined(__gfx1010__) || defined(__gfx900__) int tmp1; |