summaryrefslogtreecommitdiff
path: root/ggml/src/ggml-cuda.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ggml/src/ggml-cuda.cu')
-rw-r--r--ggml/src/ggml-cuda.cu21
1 files changed, 19 insertions, 2 deletions
diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu
index da3dc334..ae7a55c6 100644
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -500,6 +500,14 @@ GGML_CALL static void ggml_backend_cuda_buffer_init_tensor(ggml_backend_buffer_t
}
}
+GGML_CALL static void ggml_backend_cuda_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) {
+ ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
+
+ ggml_cuda_set_device(ctx->device);
+ CUDA_CHECK(cudaMemsetAsync((char *)tensor->data + offset, value, size, cudaStreamPerThread));
+ CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
+}
+
GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
@@ -551,6 +559,7 @@ static ggml_backend_buffer_i ggml_backend_cuda_buffer_interface = {
/* .free_buffer = */ ggml_backend_cuda_buffer_free_buffer,
/* .get_base = */ ggml_backend_cuda_buffer_get_base,
/* .init_tensor = */ ggml_backend_cuda_buffer_init_tensor,
+ /* .memset_tensor = */ ggml_backend_cuda_buffer_memset_tensor,
/* .set_tensor = */ ggml_backend_cuda_buffer_set_tensor,
/* .get_tensor = */ ggml_backend_cuda_buffer_get_tensor,
/* .cpy_tensor = */ ggml_backend_cuda_buffer_cpy_tensor,
@@ -867,6 +876,7 @@ static struct ggml_backend_buffer_i ggml_backend_cuda_split_buffer_interface = {
/* .free_buffer = */ ggml_backend_cuda_split_buffer_free_buffer,
/* .get_base = */ ggml_backend_cuda_split_buffer_get_base,
/* .init_tensor = */ ggml_backend_cuda_split_buffer_init_tensor,
+ /* .memset_tensor = */ NULL,
/* .set_tensor = */ ggml_backend_cuda_split_buffer_set_tensor,
/* .get_tensor = */ ggml_backend_cuda_split_buffer_get_tensor,
/* .cpy_tensor = */ NULL,
@@ -3566,17 +3576,24 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
}
return false;
} break;
+ case GGML_OP_SILU_BACK:
+ return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32;
+ break;
+ case GGML_OP_NORM:
+ case GGML_OP_RMS_NORM:
+ return true;
+ case GGML_OP_RMS_NORM_BACK:
+ return ggml_is_contiguous(op->src[0]) && op->ne[0] % WARP_SIZE == 0;
+ break;
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
case GGML_OP_VIEW:
case GGML_OP_PERMUTE:
case GGML_OP_TRANSPOSE:
- case GGML_OP_NORM:
case GGML_OP_ADD:
case GGML_OP_MULTI_ADD:
case GGML_OP_MUL:
case GGML_OP_DIV:
- case GGML_OP_RMS_NORM:
case GGML_OP_FUSED_RMS_NORM:
case GGML_OP_SCALE:
case GGML_OP_SOFTCAP: