diff options
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r-- | ggml-cuda.cu | 19 |
1 files changed, 5 insertions, 14 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 05e5d18a..0d599e20 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -9790,8 +9790,8 @@ static void ggml_cuda_mul_mat_id(const ggml_tensor * src0, const ggml_tensor * s // TODO: mmq/mmv support #endif - const int64_t nb11 = src1->nb[1]; - const int64_t nb1 = dst->nb[1]; + const size_t nb11 = src1->nb[1]; + const size_t nb1 = dst->nb[1]; const struct ggml_tensor * ids = src0; const int32_t id = ((int32_t *) dst->op_params)[0]; @@ -10304,15 +10304,11 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t if (ggml_is_quantized(tensor->type)) { // initialize padding to 0 to avoid possible NaN values - int64_t row_low = 0; - int64_t row_high = ggml_nrows(tensor); - int64_t nrows_split = row_high - row_low; - - size_t original_size = ggml_nbytes_split(tensor, nrows_split); + size_t original_size = ggml_nbytes(tensor); size_t padded_size = ggml_backend_buft_get_alloc_size(buffer->buft, tensor); if (padded_size > original_size && tensor->view_src == nullptr) { - CUDA_CHECK(cudaMemsetAsync((char *)tensor->data + original_size, 0, padded_size - original_size, g_cudaStreams[ctx->device][0])); + CUDA_CHECK(cudaMemset((char *)tensor->data + original_size, 0, padded_size - original_size)); } } } @@ -10415,12 +10411,7 @@ GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alignment(ggml_backend } GGML_CALL static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_type_t buft, const ggml_tensor * tensor) { - int64_t row_low = 0; - int64_t row_high = ggml_nrows(tensor); - int64_t nrows_split = row_high - row_low; - - size_t size = ggml_nbytes_split(tensor, nrows_split); - + size_t size = ggml_nbytes(tensor); int64_t ne0 = tensor->ne[0]; if (ggml_is_quantized(tensor->type)) { |