From bde7cd3cd949c1a85d3a199498ac98e78039d46f Mon Sep 17 00:00:00 2001 From: Radoslav Gerganov Date: Mon, 3 Jun 2024 20:03:26 +0300 Subject: llama : offload to RPC in addition to other backends (#7640) * llama : offload to RPC in addition to other backends * - fix copy_tensor being called on the src buffer instead of the dst buffer - always initialize views in the view_src buffer - add RPC backend to Makefile build - add endpoint to all RPC object names * add rpc-server to Makefile * Update llama.cpp Co-authored-by: slaren --------- Co-authored-by: slaren --- ggml-backend.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'ggml-backend.c') diff --git a/ggml-backend.c b/ggml-backend.c index 9e35ce98..05737ed6 100644 --- a/ggml-backend.c +++ b/ggml-backend.c @@ -151,7 +151,7 @@ void ggml_backend_buffer_reset(ggml_backend_buffer_t buffer) { bool ggml_backend_buffer_copy_tensor(const struct ggml_tensor * src, struct ggml_tensor * dst) { ggml_backend_buffer_t dst_buf = dst->view_src ? dst->view_src->buffer : dst->buffer; if (dst_buf->iface.cpy_tensor) { - return src->buffer->iface.cpy_tensor(dst_buf, src, dst); + return dst_buf->iface.cpy_tensor(dst_buf, src, dst); } return false; } @@ -1887,15 +1887,15 @@ ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, // utils -void ggml_backend_view_init(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor) { +void ggml_backend_view_init(struct ggml_tensor * tensor) { GGML_ASSERT(tensor->buffer == NULL); GGML_ASSERT(tensor->view_src != NULL); GGML_ASSERT(tensor->view_src->buffer != NULL); GGML_ASSERT(tensor->view_src->data != NULL); - tensor->buffer = buffer; + tensor->buffer = tensor->view_src->buffer; tensor->data = (char *)tensor->view_src->data + tensor->view_offs; - ggml_backend_buffer_init_tensor(buffer, tensor); + ggml_backend_buffer_init_tensor(tensor->buffer, tensor); } void ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr) { @@ -1954,7 +1954,7 @@ static void graph_copy_init_tensor(struct ggml_hash_set hash_set, struct ggml_te struct ggml_tensor * dst = node_copies[id]; if (dst->view_src != NULL) { graph_copy_init_tensor(hash_set, node_copies, node_init, src->view_src); - ggml_backend_view_init(dst->view_src->buffer, dst); + ggml_backend_view_init(dst); } else { ggml_backend_tensor_copy(src, dst); -- cgit v1.2.3