diff options
Diffstat (limited to 'ggml-cuda.cu')
-rw-r--r-- | ggml-cuda.cu | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 019648bd..0a63c1ec 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -8898,6 +8898,12 @@ static void ggml_cuda_nop(const ggml_tensor * src0, const ggml_tensor * src1, gg (void) dst; } +static size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split) { + static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); + + return nrows_split*ggml_row_size(tensor->type, tensor->ne[0]); +} + void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) { const int64_t nrows = ggml_nrows(tensor); @@ -8947,8 +8953,7 @@ void ggml_cuda_transform_tensor(void * data, struct ggml_tensor * tensor) { // pad last row to a multiple of 512 elements to avoid out-of-bounds memory accesses if (ne0 % MATRIX_ROW_PADDING != 0) { - size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING) - * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type); + size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING); } char * buf; @@ -9485,8 +9490,7 @@ static size_t ggml_backend_cuda_buffer_type_get_alloc_size(ggml_backend_buffer_t if (ggml_is_quantized(tensor->type)) { if (ne0 % MATRIX_ROW_PADDING != 0) { - size += (MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING) - * ggml_type_size(tensor->type)/ggml_blck_size(tensor->type); + size += ggml_row_size(tensor->type, MATRIX_ROW_PADDING - ne0 % MATRIX_ROW_PADDING); } } |