summaryrefslogtreecommitdiff
path: root/ggml/src
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src')
-rw-r--r--ggml/src/CMakeLists.txt12
-rw-r--r--ggml/src/iqk/iqk_flash_attn.cpp2
-rw-r--r--ggml/src/iqk/iqk_mul_mat.cpp2
3 files changed, 12 insertions, 4 deletions
diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt
index 4f4337c2..14650d03 100644
--- a/ggml/src/CMakeLists.txt
+++ b/ggml/src/CMakeLists.txt
@@ -260,9 +260,15 @@ if (GGML_IQK_MUL_MAT)
add_compile_definitions(GGML_USE_IQK_MULMAT)
set(GGML_SOURCES_IQK_MM iqk/iqk_mul_mat.cpp iqk/iqk_flash_attn.cpp)
set(GGML_HEADERS_IQK_MM iqk/iqk_mul_mat.h iqk/iqk_flash_impl.h)
- if (GGML_IQK_FA_ALL_QUANTS)
- message(STATUS "Including all IQK FA kernels")
- add_compile_definitions(GGML_IQK_FA_ALL_QUANTS)
+ if (GGML_IQK_FLASH_ATTENTION)
+ message(STATUS "Enabling IQK Flash Attention kernels")
+ add_compile_definitions(GGML_IQK_FLASH_ATTENTION)
+ if (GGML_IQK_FA_ALL_QUANTS)
+ message(STATUS "Including all IQK FA kernels")
+ add_compile_definitions(GGML_IQK_FA_ALL_QUANTS)
+ endif()
+ else()
+ message(STATUS "Disabling IQK Flash Attention kernels")
endif()
endif()
diff --git a/ggml/src/iqk/iqk_flash_attn.cpp b/ggml/src/iqk/iqk_flash_attn.cpp
index 610f18b7..9a974ae7 100644
--- a/ggml/src/iqk/iqk_flash_attn.cpp
+++ b/ggml/src/iqk/iqk_flash_attn.cpp
@@ -8,7 +8,7 @@
#include "iqk_mul_mat.h"
#include "iqk_flash_impl.h"
-#ifdef IQK_IMPLEMENT
+#if defined IQK_IMPLEMENT && defined GGML_IQK_FLASH_ATTENTION
#include <algorithm>
#include <cstdio>
diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp
index 654cc706..311554f4 100644
--- a/ggml/src/iqk/iqk_mul_mat.cpp
+++ b/ggml/src/iqk/iqk_mul_mat.cpp
@@ -15875,6 +15875,7 @@ void MulMat::relu(int n, const float * x, float * y) {
#endif
} // namespace
+#ifdef GGML_IQK_FLASH_ATTENTION
namespace {
template <int k_step>
@@ -18663,6 +18664,7 @@ bool iqk_flash_attn_impl(int int_type_k, // type of k
return true;
}
+#endif
#else // IQK_IMPLEMENT