summaryrefslogtreecommitdiff
path: root/ggml-cuda.cu
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2024-06-20 14:39:21 +0200
committerGitHub <noreply@github.com>2024-06-20 14:39:21 +0200
commitd50f8897a797a5a03f31228d1b5a7b8130ee1bc2 (patch)
tree9ee91b29378e35ff8f7b5071308c12d429f316f0 /ggml-cuda.cu
parent2075a66a96cc1b04eabec7cf4b3051193d6f719e (diff)
CUDA: stream-k decomposition for MMQ (#8018)
* CUDA: stream-k decomposition for MMQ * fix undefined memory reads for small matrices
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r--ggml-cuda.cu2
1 files changed, 1 insertions, 1 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index b8298ab2..f914efd7 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -635,7 +635,7 @@ static int64_t get_row_rounding(const std::array<float, GGML_CUDA_MAX_DEVICES> &
}
const int cc = ggml_cuda_info().devices[id].cc;
- row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc, get_mmq_x_max_host(cc)));
+ row_rounding = std::max(row_rounding, (int64_t)get_mmq_y_host(cc));
}
return row_rounding;
}