summaryrefslogtreecommitdiff
path: root/ggml-cuda/common.cuh
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2024-06-20 14:39:21 +0200
committerGitHub <noreply@github.com>2024-06-20 14:39:21 +0200
commitd50f8897a797a5a03f31228d1b5a7b8130ee1bc2 (patch)
tree9ee91b29378e35ff8f7b5071308c12d429f316f0 /ggml-cuda/common.cuh
parent2075a66a96cc1b04eabec7cf4b3051193d6f719e (diff)
CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ * fix undefined memory reads for small matrices
Diffstat (limited to 'ggml-cuda/common.cuh')
-rw-r--r--ggml-cuda/common.cuh4
1 files changed, 2 insertions, 2 deletions
diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh
index de7c2e43..5bd24ebe 100644
--- a/ggml-cuda/common.cuh
+++ b/ggml-cuda/common.cuh
@@ -652,8 +652,8 @@ static int get_mmq_x_max_host(const int cc) {
}
// Round rows to this value for --split-mode row:
-static int get_mmq_y_host(const int cc, const int mmq_x) {
- return cc >= CC_VOLTA && mmq_x >= 32 ? 128 : 64;
+static int get_mmq_y_host(const int cc) {
+ return cc >= CC_VOLTA ? 128 : 64;
}
//////////////////////