summaryrefslogtreecommitdiff
path: root/ggml-cuda/softmax.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda/softmax.cu')
-rw-r--r--ggml-cuda/softmax.cu1
1 files changed, 1 insertions, 0 deletions
diff --git a/ggml-cuda/softmax.cu b/ggml-cuda/softmax.cu
index ce64f2f2..c24abae1 100644
--- a/ggml-cuda/softmax.cu
+++ b/ggml-cuda/softmax.cu
@@ -130,6 +130,7 @@ static void soft_max_f32_cuda(const float * x, const T * mask, float * dst, cons
const float m0 = powf(2.0f, -(max_bias ) / n_head_log2);
const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2);
+ // FIXME: this limit could be raised by ~2-4x on Ampere or newer
if (shmem < ggml_cuda_info().devices[ggml_cuda_get_device()].smpb) {
switch (ncols_x) {
case 32: