summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Godfrey <AndrewGodfrey@users.noreply.github.com>2023-11-17 00:01:15 -0800
committerGitHub <noreply@github.com>2023-11-17 10:01:15 +0200
commitb83e149ec6264d078e6a47412e7347bf5c2bfcc9 (patch)
tree395a0764606ee5550a3ee9c2f6b2da9879f4bbec
parent4f447a48339977073a1af4f33ae873465ff64994 (diff)
cuda : get_row_rounding F32 (#4095)
* Fix #4017 * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> * Update ggml-cuda.cu Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com> --------- Co-authored-by: Jared Van Bortel <cebtenzzre@gmail.com>
-rw-r--r--ggml-cuda.cu2
1 files changed, 2 insertions, 0 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu
index 9aa61fe4..874ad9ac 100644
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@@ -6356,6 +6356,7 @@ static int64_t get_row_rounding(ggml_type type) {
case GGML_TYPE_Q8_0:
return max_compute_capability >= CC_RDNA2 ? 128 : 64;
case GGML_TYPE_F16:
+ case GGML_TYPE_F32:
return 1;
case GGML_TYPE_Q2_K:
return max_compute_capability >= CC_RDNA2 ? 128 : 32;
@@ -6378,6 +6379,7 @@ static int64_t get_row_rounding(ggml_type type) {
case GGML_TYPE_Q8_0:
return 64;
case GGML_TYPE_F16:
+ case GGML_TYPE_F32:
return 1;
case GGML_TYPE_Q2_K:
case GGML_TYPE_Q3_K: