diff options
author | Kawrakow <iwankawrakow@gmail.com> | 2025-05-23 18:25:11 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-05-23 18:25:11 +0300 |
commit | 9fb82af3a80f8b1774afd198e981460dc23b41dc (patch) | |
tree | 36334806b082534e28bd5d87811729286e51f96e /ggml/src | |
parent | 6b12c2e7e8c9f8e6925ff8d9e7ebd5231bb9e6ef (diff) |
Fix bug in MMVQ kernel (#446)
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Diffstat (limited to 'ggml/src')
-rw-r--r-- | ggml/src/ggml-cuda/mmvq.cu | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml/src/ggml-cuda/mmvq.cu index d0477835..30a6a58b 100644 --- a/ggml/src/ggml-cuda/mmvq.cu +++ b/ggml/src/ggml-cuda/mmvq.cu @@ -72,10 +72,13 @@ static __device__ void mul_mat_vec_q( constexpr vec_dot_q_cuda_t vec_dot_q_cuda = get_vec_dot_q_cuda(type); + //int64_t rows_per_cuda_block = ggml_cuda_info().devices[id].cc < CC_RDNA2 ? + // ncols_y < 4 ? 1 : 2 : 1; + #if defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && (defined(RDNA2) || defined(RDNA3)) constexpr int rows_per_cuda_block = 1; #else - constexpr int rows_per_cuda_block = ncols_y == 1 ? 1 : 2; + constexpr int rows_per_cuda_block = ncols_y < 4 ? 1 : 2; #endif // defined(GGML_USE_HIPBLAS) && defined(__HIP_PLATFORM_AMD__) && !defined(RDNA2) && !defined(RDNA3) const int tid = WARP_SIZE*threadIdx.y + threadIdx.x; |