From dc685be46622a8fabfd57cfa804237c8f15679b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Sun, 12 May 2024 19:40:45 +0200 Subject: CUDA: add FP32 FlashAttention vector kernel (#7188) * CUDA: add FP32 FlashAttention vector kernel * fixup! CUDA: add FP32 FlashAttention vector kernel * fixup! fixup! CUDA: add FP32 FlashAttention vector kernel * fixup! fixup! fixup! CUDA: add FP32 FlashAttention vector kernel --- tests/test-backend-ops.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'tests') diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 731788b9..45a2cb85 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2,6 +2,7 @@ #include #include #include + #include #include #include @@ -2173,11 +2174,7 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op test_cases.emplace_back(new test_timestep_embedding()); test_cases.emplace_back(new test_leaky_relu()); -#if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) - for (int hs : { 64, 128, }) { // other head sizes not implemented -#else for (int hs : { 64, 80, 128, 256, }) { -#endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) for (float max_bias : {0.0f, 8.0f}) { for (int nh : { 32, }) { for (int kv : { 512, 1024, }) { -- cgit v1.2.3