From 2ec2229f2e9847d4e96bd7f163201810c8f8299a Mon Sep 17 00:00:00 2001 From: Nexes the Elder <124105151+Nexesenex@users.noreply.github.com> Date: Sun, 18 May 2025 16:36:41 +0200 Subject: Forgotten MMQ ref and typo (#431) --- ggml/src/ggml-cuda/mmq.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'ggml/src') diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 72fa9f13..7a51c514 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -169,6 +169,7 @@ static constexpr __device__ int get_mmq_y_device() { static constexpr __host__ __device__ tile_x_sizes mmq_get_dp4a_tile_x_sizes(ggml_type type, int mmq_y) { switch (type) { + case GGML_TYPE_Q4_0 : return MMQ_DP4A_TXS_Q4_0; case GGML_TYPE_Q4_1 : return MMQ_DP4A_TXS_Q4_1; case GGML_TYPE_Q5_0 : return MMQ_DP4A_TXS_Q8_0; case GGML_TYPE_Q5_1 : return MMQ_DP4A_TXS_Q8_1; @@ -3363,7 +3364,7 @@ static __global__ void mul_mat_q( const int jt = kbc / (blocks_per_ne00*nty); const int it = (kbc - jt*(blocks_per_ne00*nty)) / blocks_per_ne00; - constexpr bool fixup = true; // Last index writes it data to fixup buffer to avoid data races with other blocks. + constexpr bool fixup = true; // Last index writes its data to fixup buffer to avoid data races with other blocks. mul_mat_q_process_tile (x, yc, dst, tmp_fixup, ne00, ne01, stride01, ne10, ne11, stride11, ne0, it, jt, kb0_start, kb0_stop); -- cgit v1.2.3