CUDA: generalize FP16 fattn vec kernel (#7061)

* CUDA: generalize FP16 fattn vec kernel * disable unsupported head sizes for AMD in test * try AMD fix * fix batch size 2-8 * partially revert changes
author: Johannes Gäßler <johannesg@5d6.de> 2024-05-09 14:32:02 +0200
committer: GitHub <noreply@github.com> 2024-05-09 14:32:02 +0200
commit: a743d76a01f23038b2c85af1e9048ee836767b44 (patch)
tree: 8182fc85cb9fd055bc9c8268d5d4a05bcf87f57a /tests/test-backend-ops.cpp
parent: f31ec120bc36c6270e4948e6a065a7c4cfa0c404 (diff)
1 files changed, 4 insertions, 0 deletions
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 41718e00..0d66de5d 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -2175,7 +2175,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op
     test_cases.emplace_back(new test_timestep_embedding());
     test_cases.emplace_back(new test_leaky_relu());
 
+#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
+    for (int hs : { 64, 128, }) { // other head sizes not implemented
+#else
     for (int hs : { 64, 80, 128, 256, }) {
+#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__)
         for (int nh : { 32, }) {
             for (int kv : { 512, 1024, }) {
                 for (int nb : { 1, 2, 4, 8, }) {
author	Johannes Gäßler <johannesg@5d6.de>	2024-05-09 14:32:02 +0200
committer	GitHub <noreply@github.com>	2024-05-09 14:32:02 +0200
commit	a743d76a01f23038b2c85af1e9048ee836767b44 (patch)
tree	8182fc85cb9fd055bc9c8268d5d4a05bcf87f57a /tests/test-backend-ops.cpp
parent	f31ec120bc36c6270e4948e6a065a7c4cfa0c404 (diff)