summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-12-23 09:16:33 +0100
committerGitHub <noreply@github.com>2023-12-23 09:16:33 +0100
commite0a4002273907b2c414b6b5442d99e08bfe2df35 (patch)
tree6c9ecc8f515c9f0ececc609150be3cb467043498
parent7082d24cec35e9ce9147535a2224dfc67ee0a78c (diff)
CUDA: fixed row rounding for 0 tensor splits (#4594)
-rw-r--r--ggml-cuda.cu8
1 files changed, 6 insertions, 2 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 7c2a834e..490081ca 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -7937,12 +7937,16 @@ static void ggml_cuda_op_mul_mat(
if (id != 0) {
row_low[id] = ne01*g_tensor_split[id];
- row_low[id] -= row_low[id] % rounding;
+ if (row_low[id] < ne01) {
+ row_low[id] -= row_low[id] % rounding;
+ }
}
if (id != g_device_count - 1) {
row_high[id] = ne01*g_tensor_split[id + 1];
- row_high[id] -= row_high[id] % rounding;
+ if (row_high[id] < ne01) {
+ row_high[id] -= row_high[id] % rounding;
+ }
}
}
}