summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNexes the Elder <124105151+Nexesenex@users.noreply.github.com>2025-05-18 16:36:41 +0200
committerGitHub <noreply@github.com>2025-05-18 17:36:41 +0300
commit2ec2229f2e9847d4e96bd7f163201810c8f8299a (patch)
tree252bc59c3588296093945cf8e7da85945cbb478a
parentb3036a872f474beadf2df72d452ca7016db72aac (diff)
Forgotten MMQ ref and typo (#431)
-rw-r--r--ggml/src/ggml-cuda/mmq.cuh3
1 files changed, 2 insertions, 1 deletions
diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh
index 72fa9f13..7a51c514 100644
--- a/ggml/src/ggml-cuda/mmq.cuh
+++ b/ggml/src/ggml-cuda/mmq.cuh
@@ -169,6 +169,7 @@ static constexpr __device__ int get_mmq_y_device() {
static constexpr __host__ __device__ tile_x_sizes mmq_get_dp4a_tile_x_sizes(ggml_type type, int mmq_y) {
switch (type) {
+ case GGML_TYPE_Q4_0 : return MMQ_DP4A_TXS_Q4_0;
case GGML_TYPE_Q4_1 : return MMQ_DP4A_TXS_Q4_1;
case GGML_TYPE_Q5_0 : return MMQ_DP4A_TXS_Q8_0;
case GGML_TYPE_Q5_1 : return MMQ_DP4A_TXS_Q8_1;
@@ -3363,7 +3364,7 @@ static __global__ void mul_mat_q(
const int jt = kbc / (blocks_per_ne00*nty);
const int it = (kbc - jt*(blocks_per_ne00*nty)) / blocks_per_ne00;
- constexpr bool fixup = true; // Last index writes it data to fixup buffer to avoid data races with other blocks.
+ constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks.
mul_mat_q_process_tile<type, mmq_x, nwarps, need_check, fixup>
(x, yc, dst, tmp_fixup, ne00, ne01, stride01, ne10, ne11, stride11, ne0,
it, jt, kb0_start, kb0_stop);