diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-06-20 14:39:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-20 14:39:21 +0200 |
commit | d50f8897a797a5a03f31228d1b5a7b8130ee1bc2 (patch) | |
tree | 9ee91b29378e35ff8f7b5071308c12d429f316f0 /ggml-cuda/common.cuh | |
parent | 2075a66a96cc1b04eabec7cf4b3051193d6f719e (diff) |
CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ
* fix undefined memory reads for small matrices
Diffstat (limited to 'ggml-cuda/common.cuh')
-rw-r--r-- | ggml-cuda/common.cuh | 4 |
1 files changed, 2 insertions, 2 deletions
diff --git a/ggml-cuda/common.cuh b/ggml-cuda/common.cuh index de7c2e43..5bd24ebe 100644 --- a/ggml-cuda/common.cuh +++ b/ggml-cuda/common.cuh @@ -652,8 +652,8 @@ static int get_mmq_x_max_host(const int cc) { } // Round rows to this value for --split-mode row: -static int get_mmq_y_host(const int cc, const int mmq_x) { - return cc >= CC_VOLTA && mmq_x >= 32 ? 128 : 64; +static int get_mmq_y_host(const int cc) { + return cc >= CC_VOLTA ? 128 : 64; } ////////////////////// |