summaryrefslogtreecommitdiff
path: root/ggml-cuda/dmmv.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda/dmmv.cu')
-rw-r--r--ggml-cuda/dmmv.cu6
1 files changed, 3 insertions, 3 deletions
diff --git a/ggml-cuda/dmmv.cu b/ggml-cuda/dmmv.cu
index 0b17e3cb..7313e3e1 100644
--- a/ggml-cuda/dmmv.cu
+++ b/ggml-cuda/dmmv.cu
@@ -565,7 +565,7 @@ static __global__ void dequantize_mul_mat_vec_q6_k(const void * __restrict__ vx,
}
}
-static __device__ void convert_f16(const void * vx, const int ib, const int iqs, dfloat2 & v){
+static __device__ void convert_f16(const void * vx, const int64_t ib, const int iqs, dfloat2 & v){
const half * x = (const half *) vx;
// automatic half -> float type cast if dfloat == float
@@ -577,7 +577,7 @@ template <int qk, int qr, dequantize_kernel_t dequantize_kernel>
static __global__ void dequantize_mul_mat_vec(const void * __restrict__ vx, const dfloat * __restrict__ y, float * __restrict__ dst, const int ncols, const int nrows) {
// qk = quantized weights per x block
// qr = number of quantized weights per data value in x block
- const int row = blockIdx.x*blockDim.y + threadIdx.y;
+ const int64_t row = (int64_t)blockIdx.x*blockDim.y + threadIdx.y;
if (row >= nrows) {
return;
@@ -598,7 +598,7 @@ static __global__ void dequantize_mul_mat_vec(const void * __restrict__ vx, cons
for (int i = 0; i < ncols; i += iter_stride) {
const int col = i + vals_per_iter*tid;
- const int ib = (row*ncols + col)/qk; // x block index
+ const int64_t ib = ((int64_t)row*ncols + col)/qk; // x block index
const int iqs = (col%qk)/qr; // x quant index
const int iybs = col - col%qk; // y block start index