diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2024-06-20 14:39:21 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-20 14:39:21 +0200 |
commit | d50f8897a797a5a03f31228d1b5a7b8130ee1bc2 (patch) | |
tree | 9ee91b29378e35ff8f7b5071308c12d429f316f0 /ggml-cuda.cu | |
parent | 2075a66a96cc1b04eabec7cf4b3051193d6f719e (diff) |
CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ
* fix undefined memory reads for small matrices
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r-- | ggml-cuda.cu | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu index b8298ab2..f914efd7 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -635,7 +635,7 @@ static int64_t get_row_rounding(const std::array<float, GGML_CUDA_MAX_DEVICES> & } const int cc = ggml_cuda_info().devices[id].cc; - row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc, get_mmq_x_max_host(cc))); + row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc)); } return row_rounding; } |